
@article{yu_gosemsim_2020,
	title = {Gene Ontology Semantic Similarity Analysis Using {GOSemSim}},
	volume = {2117},
	issn = {1940-6029},
	doi = {10.1007/978-1-0716-0301-7_11},
	abstract = {The {GOSemSim} package, an R-based tool within the Bioconductor project, offers several methods based on information content and graph structure for measuring semantic similarity among {GO} terms, gene products and gene clusters. In this chapter, I illustrate the use of {GOSemSim} on a list of regulators in preimplantation embryos. A step-by-step analysis was provided as well as instructions on interpretation and visualization of the results. {GOSemSim} is open-source and is available from https://www.bioconductor.org/packages/{GOSemSim} .},
	pages = {207--215},
	journaltitle = {Methods in Molecular Biology (Clifton, N.J.)},
	shortjournal = {Methods Mol. Biol.},
	author = {Yu, Guangchuang},
	date = {2020},
	pmid = {31960380},
	keywords = {Functional prediction, Gene ontology, {GOSemSim}, Reproducible research, Semantic similarity}
}


@article{yu_dose_2015,
	title = {{DOSE}: an R/Bioconductor package for disease ontology semantic and enrichment analysis},
	volume = {31},
	issn = {1367-4803, 1460-2059},
	url = {http://bioinformatics.oxfordjournals.org.eproxy2.lib.hku.hk/content/31/4/608},
	doi = {10.1093/bioinformatics/btu684},
	shorttitle = {{DOSE}},
	abstract = {Summary: Disease ontology ({DO}) annotates human genes in the context of disease. {DO} is important annotation in translating molecular findings from high-throughput data to clinical relevance. {DOSE} is an R package providing semantic similarity computations among {DO} terms and genes which allows biologists to explore the similarities of diseases and of gene functions in disease perspective. Enrichment analyses including hypergeometric model and gene set enrichment analysis are also implemented to support discovering disease associations of high-throughput biological data. This allows biologists to verify disease relevance in a biological experiment and identify unexpected disease associations. Comparison among gene clusters is also supported.
Availability and implementation: {DOSE} is released under Artistic-2.0 License. The source code and documents are freely available through Bioconductor (http://www.bioconductor.org/packages/release/bioc/html/{DOSE}.html).
Supplementary information: Supplementary data are available at Bioinformatics online.
Contact: gcyu@connect.hku.hk or tqyhe@jnu.edu.cn},
	pages = {608--609},
	number = {4},
	journaltitle = {Bioinformatics},
	shortjournal = {Bioinformatics},
	author = {Yu, Guangchuang and Wang, Li-Gen and Yan, Guang-Rong and He, Qing-Yu},
	urldate = {2015-02-13},
	date = {2015-02-15},
	langid = {english}
}

@article{yu2012,
	title = {{clusterProfiler:} an R Package for Comparing Biological Themes Among Gene Clusters},
	volume = {16},
	issn = {1536-2310, 1557-8100},
	shorttitle = {{clusterProfiler}},
	url = {http://online.liebertpub.com/doi/abs/10.1089/omi.2011.0118},
	doi = {10.1089/omi.2011.0118},
	number = {5},
	urldate = {2012-05-05},
	journal = {{OMICS:} A Journal of Integrative Biology},
	author = {Yu, Guangchuang and Wang, Li-Gen and Han, Yanyan and He, Qing-Yu},
	month = may,
	year = {2012},
	pages = {284--287},
	file = {2012-OMICS-clusterProfiler.pdf:/Volumes/YGC/MyZotero/storage/3ZEKB39T/2012-OMICS-clusterProfiler.pdf:application/pdf;clusterProfiler: an R Package for Comparing Biological Themes Among Gene Clusters | Abstract:/Volumes/YGC/MyZotero/storage/WPTJGAT7/omi.2011.html:text/html}
}

@article{yu_reactomepa_2016,
	title = {{ReactomePA}: an R/Bioconductor package for reactome pathway analysis and visualization},
	volume = {12},
	issn = {1742-2051},
	url = {http://pubs.rsc.org.eproxy2.lib.hku.hk/en/content/articlelanding/2016/mb/c5mb00663e},
	doi = {10.1039/C5MB00663E},
	shorttitle = {{ReactomePA}},
	abstract = {Reactome is a manually curated pathway annotation database for unveiling high-order biological pathways from high-throughput data. {ReactomePA} is an R/Bioconductor package providing enrichment analyses, including hypergeometric test and gene set enrichment analyses. A functional analysis can be applied to the genomic coordination obtained from a sequencing experiment to analyze the functional significance of genomic loci including cis-regulatory elements and non-coding regions. Comparison among different experiments is also supported. Moreover, {ReactomePA} provides several visualization functions to produce highly customizable, publication-quality figures. The source code and documents of {ReactomePA} are freely available through Bioconductor (http://www.bioconductor.org/packages/{ReactomePA}).},
	pages = {477--479},
	number = {2},
	journaltitle = {Molecular {BioSystems}},
	shortjournal = {Mol. {BioSyst}.},
	author = {Yu, Guangchuang and He, Qing-Yu},
	urldate = {2016-02-17},
	date = {2016-01-26},
	langid = {english}
}


@article{ggtreeExtra_2021,
    author = {Xu, Shuangbin and Dai, Zehan and Guo, Pingfan and Fu, Xiaocong and Liu, Shanshan and Zhou, Lang and Tang, Wenli and Feng, Tingze and Chen, Meijun and Zhan, Li and Wu, Tianzhi and Hu, Erqiang and Jiang, Yong and Bo, Xiaochen and Yu, Guangchuang},
    title = "{ggtreeExtra: Compact Visualization of Richly Annotated Phylogenetic Data}",
    journal = {Molecular Biology and Evolution},
    volume = {38},
    number = {9},
    pages = {4039-4042},
    year = {2021},
    month = {06},
    abstract = "{We present the ggtreeExtra package for visualizing heterogeneous data with a phylogenetic tree in a circular or rectangular layout (https://www.bioconductor.org/packages/ggtreeExtra). The package supports more data types and visualization methods than other tools. It supports using the grammar of graphics syntax to present data on a tree with richly annotated layers and allows evolutionary statistics inferred by commonly used software to be integrated and visualized with external data. GgtreeExtra is a universal tool for tree data visualization. It extends the applications of the phylogenetic tree in different disciplines by making more domain-specific data to be available to visualize and interpret in the evolutionary context.}",
    issn = {0737-4038},
    doi = {10.1093/molbev/msab166},
    url = {https://doi.org/10.1093/molbev/msab166},
    eprint = {https://academic.oup.com/mbe/article-pdf/38/9/4039/39882875/msab166.pdf},
}


@article{ggbreak,
    title = {Use ggbreak to effectively utilize plotting space to deal with large datasets and outliers.},
    author = {Xu, Shuangbin and Chen, Meijun and Feng, Tingze and Zhan Li and Zhou Lang and Yu, Guangchuang},
    year = {2021},
    journal = {Frontiers in Genetics},
    volume = {12},
    pages = {774846},
    doi = {10.3389/fgene.2021.774846},
}


@article{yu_cp_2020,
	title = {Using ggtree to Visualize Data on Tree-Like Structures},
	volume = {69},
	rights = {© 2020 John Wiley \& Sons, Inc.},
	issn = {1934-340X},
	url = {https://currentprotocols.onlinelibrary.wiley.com/doi/abs/10.1002/cpbi.96},
	doi = {10.1002/cpbi.96},
	abstract = {Ggtree is an R/Bioconductor package for visualizing tree-like structures and associated data. After 5 years of continual development, ggtree has been evolved as a package suite that contains treeio for tree data input and output, tidytree for tree data manipulation, and ggtree for tree data visualization. Ggtree was originally designed to work with phylogenetic trees, and has been expanded to support other tree-like structures, which extends the application of ggtree to present tree data in other disciplines. This article contains five basic protocols describing how to visualize trees using the grammar of graphics syntax, how to visualize hierarchical clustering results with associated data, how to estimate bootstrap values and visualize the values on the tree, how to estimate continuous and discrete ancestral traits and visualize ancestral states on the tree, and how to visualize a multiple sequence alignment with a phylogenetic tree. The ggtree package is freely available at https://www.bioconductor.org/packages/ggtree. © 2020 by John Wiley \& Sons, Inc. Basic Protocol 1: Using grammar of graphics for visualizing trees Basic Protocol 2: Visualizing hierarchical clustering using ggtree Basic Protocol 3: Visualizing bootstrap values as symbolic points Basic Protocol 4: Visualizing ancestral status Basic Protocol 5: Visualizing a multiple sequence alignment with a phylogenetic tree},
	pages = {e96},
	number = {1},
	journaltitle = {Current Protocols in Bioinformatics},
	author = {Yu, Guangchuang},
	urldate = {2020-03-06},
	date = {2020},
	langid = {english},
	keywords = {grammar of graphics, phylogeny, tree associated data, tree structure, visualization}
}

@article{wang_treeio_2020,
	title = {Treeio: An R Package for Phylogenetic Tree Input and Output with Richly Annotated and Associated Data},
	volume = {37},
	issn = {0737-4038},
	url = {https://academic.oup.com/mbe/article/37/2/599/5601621},
	doi = {10.1093/molbev/msz240},
	shorttitle = {Treeio},
	abstract = {Abstract.  Phylogenetic trees and data are often stored in incompatible and inconsistent formats. The outputs of software tools that contain trees with analysis},
	pages = {599--603},
	number = {2},
	journaltitle = {Molecular Biology and Evolution},
	shortjournal = {Mol Biol Evol},
	author = {Wang, Li-Gen and Lam, Tommy Tsan-Yuk and Xu, Shuangbin and Dai, Zehan and Zhou, Lang and Feng, Tingze and Guo, Pingfan and Dunn, Casey W. and Jones, Bradley R. and Bradley, Tyler and Zhu, Huachen and Guan, Yi and Jiang, Yong and Yu, Guangchuang},
	urldate = {2020-02-18},
	date = {2020-02-01},
	langid = {english}
}

@article{yu_two_2018,
	title = {Two Methods for Mapping and Visualizing Associated Data on Phylogeny Using Ggtree},
	volume = {35},
	issn = {0737-4038},
	url = {https://academic.oup.com/mbe/article/35/12/3041/5142656},
	doi = {10.1093/molbev/msy194},
	abstract = {Abstract.  Ggtree is a comprehensive R package for visualizing and annotating phylogenetic trees with associated data. It can also map and visualize associated},
	pages = {3041--3043},
	number = {12},
	journaltitle = {Molecular Biology and Evolution},
	shortjournal = {Mol Biol Evol},
	author = {Yu, Guangchuang and Lam, Tommy Tsan-Yuk and Zhu, Huachen and Guan, Yi},
	urldate = {2019-01-03},
	date = {2018-12-01},
	langid = {english}
}

@article{yu_ggtree:_2017,
	title = {ggtree: an r package for visualization and annotation of phylogenetic trees with their covariates and other associated data},
	volume = {8},
	issn = {2041-210X},
	shorttitle = {ggtree},
	url = {http://onlinelibrary.wiley.com.eproxy2.lib.hku.hk/doi/10.1111/2041-210X.12628/abstract},
	doi = {10.1111/2041-210X.12628},
	language = {en},
	number = {1},
	urldate = {2017-03-07},
	journal = {Methods in Ecology and Evolution},
	author = {Yu, Guangchuang and Smith, David K. and Zhu, Huachen and Guan, Yi and Lam, Tommy Tsan-Yuk},
	month = jan,
	year = {2017},
	keywords = {annotation, bioconductor, Evolution, Phylogeny, r package, visualization},
	pages = {28--36}
}


@article{segata_metagenomic_2011,
	title = {Metagenomic biomarker discovery and explanation},
	volume = {12},
	issn = {1465-6906},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3218848/},
	doi = {10.1186/gb-2011-12-6-r60},
	abstract = {This study describes and validates a new method for metagenomic biomarker discovery by way of class comparison, tests of biological consistency and effect size estimation. This addresses the challenge of finding organisms, genes, or pathways that consistently explain the differences between two or more microbial communities, which is a central problem to the study of metagenomics. We extensively validate our method on several microbiomes and a convenient online interface for the method is provided at http://huttenhower.sph.harvard.edu/lefse/.},
	pages = {R60},
	number = {6},
	journaltitle = {Genome Biology},
	shortjournal = {Genome Biol},
	author = {Segata, Nicola and Izard, Jacques and Waldron, Levi and Gevers, Dirk and Miropolsky, Larisa and Garrett, Wendy S and Huttenhower, Curtis},
	urldate = {2020-11-25},
	date = {2011},
	pmid = {21702898},
	pmcid = {PMC3218848}
}

@article{escudero_grand_2020,
	title = {The grand sweep of chromosomal evolution in angiosperms},
	volume = {228},
	issn = {1469-8137},
	url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/nph.16802},
	doi = {10.1111/nph.16802},
	abstract = {This article is a Commentary on Carta et al. (2020), 228: 1097–1106.},
	pages = {805--808},
	number = {3},
	journaltitle = {New Phytologist},
	author = {Escudero, Marcial and Wendel, Jonathan F.},
	urldate = {2021-11-01},
	date = {2020},
	note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/nph.16802},
	keywords = {angiosperms, chromosome number, dysploidy, genome down-sizing, genome size, karyotype evolution, phylogenetics, polyploidy},
	file = {Full Text PDF:/data/Zotero/storage/86HVD5PJ/Escudero and Wendel - 2020 - The grand sweep of chromosomal evolution in angios.pdf:application/pdf}
}

@article{michonneau_rotl:_2016,
	title = {rotl: an R package to interact with the Open Tree of Life data},
	volume = {7},
	issn = {2041-210X},
	url = {https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.12593},
	doi = {10.1111/2041-210X.12593},
	shorttitle = {rotl},
	abstract = {Summary While phylogenies have been getting easier to build, it has been difficult to reuse, combine and synthesize the information they provide because published trees are often only available as image files, and taxonomic information is not standardized across studies. The Open Tree of Life ({OTL}) project addresses these issues by providing a digital tree that encompasses all organisms, built by combining taxonomic information and published phylogenies. The project also provides tools and services to query and download parts of this synthetic tree, as well as the source data used to build it. Here, we present rotl, an R package to search and download data from the Open Tree of Life directly in R. rotl uses common data structures allowing researchers to take advantage of the rich set of tools and methods that are available in R to manipulate, analyse and visualize phylogenies. Here, and in the vignettes accompanying the package, we demonstrate how rotl can be used with other R packages to analyse biodiversity data. As phylogenies are being used in a growing number of applications, rotl facilitates access to phylogenetic data and allows their integration with statistical methods and data sources available in R.},
	pages = {1476--1481},
	number = {12},
	journaltitle = {Methods in Ecology and Evolution},
	shortjournal = {Methods in Ecology and Evolution},
	author = {Michonneau, François and Brown, Joseph W. and Winter, David J.},
	urldate = {2019-06-05},
	date = {2016-12-01},
	keywords = {phylogenetics, comparative methods, macroevolution, Open Tree of Life}
}


@article{chen_ancient_2017,
	title = {Ancient Evolution and Dispersion of Human Papillomavirus 58 Variants},
	volume = {91},
	issn = {0022-538X, 1098-5514},
	url = {http://jvi.asm.org/content/91/21/e01285-17},
	doi = {10.1128/JVI.01285-17},
	abstract = {Human papillomavirus 58 ({HPV}58) is found in 10 to 18\% of cervical cancers in East Asia but is rather uncommon elsewhere. The distribution and oncogenic potential of {HPV}58 variants appear to be heterogeneous, since the E7 T20I/G63S variant is more prevalent in East Asia and confers a 7- to 9-fold-higher risk of cervical precancer and cancer. However, the underlying genomic mechanisms that explain the geographic and carcinogenic diversity of {HPV}58 variants are still poorly understood. In this study, we used a combination of phylogenetic analyses and bioinformatics to investigate the deep evolutionary history of {HPV}58 complete genome variants. The initial splitting of {HPV}58 variants was estimated to occur 478,600 years ago (95\% highest posterior density [{HPD}], 391,000 to 569,600 years ago). This divergence time is well within the era of speciation between Homo sapiens and Neanderthals/Denisovans and around three times longer than the modern Homo sapiens divergence times. The expansion of present-day variants in Eurasia could be the consequence of viral transmission from Neanderthals/Denisovans to non-African modern human populations through gene flow. A whole-genome sequence signature analysis identified 3 amino acid changes, 16 synonymous nucleotide changes, and a 12-bp insertion strongly associated with the E7 T20I/G63S variant that represents the A3 sublineage and carries higher carcinogenetic potential. Compared with the capsid proteins, the oncogenes E7 and E6 had increased substitution rates indicative of higher selection pressure. These data provide a comprehensive evolutionary history and genomic basis of {HPV}58 variants to assist further investigation of carcinogenic association and the development of diagnostic and therapeutic strategies.
{IMPORTANCE} Papillomaviruses ({PVs}) are an ancient and heterogeneous group of double-stranded {DNA} viruses that preferentially infect the cutaneous and mucocutaneous epithelia of vertebrates. Persistent infection by specific oncogenic human papillomaviruses ({HPVs}), including {HPV}58, has been established as the primary cause of cervical cancer. In this work, we reveal the complex evolutionary history of {HPV}58 variants that explains the heterogeneity of oncogenic potential and geographic distribution. Our data suggest that {HPV}58 variants may have coevolved with archaic hominins and dispersed across the planet through host interbreeding and gene flow. Certain genes and codons of {HPV}58 variants representing higher carcinogenic potential and/or that are under positive selection may have important implications for viral host specificity, pathogenesis, and disease prevention.},
	pages = {e01285--17},
	number = {21},
	journaltitle = {Journal of Virology},
	shortjournal = {J. Virol.},
	author = {Chen, Zigui and Ho, Wendy C. S. and Boon, Siaw Shi and Law, Priscilla T. Y. and Chan, Martin C. W. and {DeSalle}, Rob and Burk, Robert D. and Chan, Paul K. S.},
	urldate = {2018-07-10},
	date = {2017-11-01},
	langid = {english},
	pmid = {28794033},
	keywords = {cervical cancer, evolution, {HPV}58, oncogenicity, papillomavirus, virus-host codivergence},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/XXBU2AQL/Chen et al. - 2017 - Ancient Evolution and Dispersion of Human Papillom.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/XGGC2RCD/e01285-17.html:text/html}
}


@article{lott_covenntree:_2015,
	title = {{CoVennTree}: a new method for the comparative analysis of large datasets},
	volume = {6},
	issn = {1664-8021},
	doi = {10.3389/fgene.2015.00043},
	shorttitle = {{CoVennTree}},
	abstract = {The visualization of massive datasets, such as those resulting from comparative metatranscriptome analyses or the analysis of microbial population structures using ribosomal {RNA} sequences, is a challenging task. We developed a new method called {CoVennTree} (Comparative weighted Venn Tree) that simultaneously compares up to three multifarious datasets by aggregating and propagating information from the bottom to the top level and produces a graphical output in Cytoscape. With the introduction of weighted Venn structures, the contents and relationships of various datasets can be correlated and simultaneously aggregated without losing information. We demonstrate the suitability of this approach using a dataset of 16S {rDNA} sequences obtained from microbial populations at three different depths of the Gulf of Aqaba in the Red Sea. {CoVennTree} has been integrated into the Galaxy {ToolShed} and can be directly downloaded and integrated into the user instance.},
	pages = {43},
	journaltitle = {Frontiers in Genetics},
	shortjournal = {Front Genet},
	author = {Lott, Steffen C. and Voß, Björn and Hess, Wolfgang R. and Steglich, Claudia},
	date = {2015},
	pmid = {25750651},
	pmcid = {PMC4335276},
	keywords = {{CoVennTree}, massive comparative analysis, rooted tree, {VDS} value, weighted Venn diagram}
}

@article{grubaugh_genomic_2017,
	title = {Genomic epidemiology reveals multiple introductions of Zika virus into the United States},
	volume = {546},
	rights = {2017 Nature Publishing Group},
	issn = {1476-4687},
	url = {https://www.nature.com/articles/nature22400},
	doi = {10.1038/nature22400},
	abstract = {Zika virus ({ZIKV}) is causing an unprecedented epidemic linked to severe congenital abnormalities1,2. In July 2016, mosquito-borne {ZIKV} transmission was reported in the continental United States; since then, hundreds of locally acquired infections have been reported in Florida3,4. To gain insights into the timing, source, and likely route(s) of {ZIKV} introduction, we tracked the virus from its first detection in Florida by sequencing {ZIKV} genomes from infected patients and Aedes aegypti mosquitoes. We show that at least 4 introductions, but potentially as many as 40, contributed to the outbreak in Florida and that local transmission is likely to have started in the spring of 2016—several months before its initial detection. By analysing surveillance and genetic data, we show that {ZIKV} moved among transmission zones in Miami. Our analyses show that most introductions were linked to the Caribbean, a finding corroborated by the high incidence rates and traffic volumes from the region into the Miami area. Our study provides an understanding of how {ZIKV} initiates transmission in new regions.},
	pages = {401--405},
	number = {7658},
	journaltitle = {Nature},
	author = {Grubaugh, Nathan D. and Ladner, Jason T. and Kraemer, Moritz U. G. and Dudas, Gytis and Tan, Amanda L. and Gangavarapu, Karthik and Wiley, Michael R. and White, Stephen and Thézé, Julien and Magnani, Diogo M. and Prieto, Karla and Reyes, Daniel and Bingham, Andrea M. and Paul, Lauren M. and Robles-Sikisaka, Refugio and Oliveira, Glenn and Pronty, Darryl and Barcellona, Carolyn M. and Metsky, Hayden C. and Baniecki, Mary Lynn and Barnes, Kayla G. and Chak, Bridget and Freije, Catherine A. and Gladden-Young, Adrianne and Gnirke, Andreas and Luo, Cynthia and {MacInnis}, Bronwyn and Matranga, Christian B. and Park, Daniel J. and Qu, James and Schaffner, Stephen F. and Tomkins-Tinch, Christopher and West, Kendra L. and Winnicki, Sarah M. and Wohl, Shirlee and Yozwiak, Nathan L. and Quick, Joshua and Fauver, Joseph R. and Khan, Kamran and Brent, Shannon E. and Jr, Robert C. Reiner and Lichtenberger, Paola N. and Ricciardi, Michael J. and Bailey, Varian K. and Watkins, David I. and Cone, Marshall R. and Iv, Edgar W. Kopp and Hogan, Kelly N. and Cannons, Andrew C. and Jean, Reynald and Monaghan, Andrew J. and Garry, Robert F. and Loman, Nicholas J. and Faria, Nuno R. and Porcelli, Mario C. and Vasquez, Chalmers and Nagle, Elyse R. and Cummings, Derek A. T. and Stanek, Danielle and Rambaut, Andrew and Sanchez-Lockhart, Mariano and Sabeti, Pardis C. and Gillis, Leah D. and Michael, Scott F. and Bedford, Trevor and Pybus, Oliver G. and Isern, Sharon and Palacios, Gustavo and Andersen, Kristian G.},
	urldate = {2018-07-18},
	date = {2017-06},
	langid = {english}
}


@article{kumar_mega7_2016,
	title = {{MEGA}7: Molecular Evolutionary Genetics Analysis Version 7.0 for Bigger Datasets},
	volume = {33},
	issn = {1537-1719},
	doi = {10.1093/molbev/msw054},
	shorttitle = {{MEGA}7},
	abstract = {We present the latest version of the Molecular Evolutionary Genetics Analysis (Mega) software, which contains many sophisticated methods and tools for phylogenomics and phylomedicine. In this major upgrade, Mega has been optimized for use on 64-bit computing systems for analyzing larger datasets. Researchers can now explore and analyze tens of thousands of sequences in Mega The new version also provides an advanced wizard for building timetrees and includes a new functionality to automatically predict gene duplication events in gene family trees. The 64-bit Mega is made available in two interfaces: graphical and command line. The graphical user interface ({GUI}) is a native Microsoft Windows application that can also be used on Mac {OS} X. The command line Mega is available as native applications for Windows, Linux, and Mac {OS} X. They are intended for use in high-throughput and scripted analysis. Both versions are available from www.megasoftware.net free of charge.},
	pages = {1870--1874},
	number = {7},
	journaltitle = {Molecular Biology and Evolution},
	shortjournal = {Mol. Biol. Evol.},
	author = {Kumar, Sudhir and Stecher, Glen and Tamura, Koichiro},
	date = {2016},
	pmid = {27004904},
	keywords = {Algorithms, Biological Evolution, Databases, Genetic, Datasets as Topic, Evolution, Molecular, evolution., gene families, Internet, Phylogeny, Sequence Alignment, Sequence Analysis, software, Software, timetree, User-Computer Interface}
}

@article{schliep_phangorn_2011,
	title = {phangorn: phylogenetic analysis in R},
	volume = {27},
	url = {http://bioinformatics.oxfordjournals.org/content/27/4/592.abstract},
	doi = {10.1093/bioinformatics/btq706},
	shorttitle = {phangorn},
	abstract = {Summary: phangorn is a package for phylogenetic reconstruction and analysis in the R language. Previously it was only possible to estimate phylogenetic trees with distance methods in R. phangorn, now offers the possibility of reconstructing phylogenies with distance based methods, maximum parsimony or maximum likelihood ({ML}) and performing Hadamard conjugation. Extending the general {ML} framework, this package provides the possibility of estimating mixture and partition models. Furthermore, phangorn offers several functions for comparing trees, phylogenetic models or splits, simulating character data and performing congruence analyses.Availability: phangorn can be obtained through the {CRAN} homepage http://cran.r-project.org/web/packages/phangorn/index.html. phangorn is licensed under {GPL} 2.Contact: klaus.kschliep@snv.jussieu.{frSupplementary} information: Supplementary data are available at Bioinformatics online.},
	pages = {592--593},
	number = {4},
	journaltitle = {Bioinformatics},
	author = {Schliep, Klaus Peter},
	urldate = {2011-03-05},
	date = {2011-02-15}
}

@article{sanderson_r8s:_2003,
	title = {r8s: inferring absolute rates of molecular evolution and divergence times in the absence of a molecular clock},
	volume = {19},
	issn = {1367-4803, 1460-2059},
	shorttitle = {r8s},
	url = {http://bioinformatics.oxfordjournals.org./content/19/2/301},
	doi = {10.1093/bioinformatics/19.2.301},
	language = {en},
	number = {2},
	urldate = {2015-09-07},
	journal = {Bioinformatics},
	author = {Sanderson, Michael J.},
	month = jan,
	year = {2003},
	pmid = {12538260},
	pages = {301--302}
}



@article{vos_nexml:_2012,
	title = {{NeXML}: rich, extensible, and verifiable representation of comparative data and metadata},
	volume = {61},
	issn = {1076-836X},
	doi = {10.1093/sysbio/sys025},
	shorttitle = {{NeXML}},
	abstract = {In scientific research, integration and synthesis require a common understanding of where data come from, how much they can be trusted, and what they may be used for. To make such an understanding computer-accessible requires standards for exchanging richly annotated data. The challenges of conveying reusable data are particularly acute in regard to evolutionary comparative analysis, which comprises an ever-expanding list of data types, methods, research aims, and subdisciplines. To facilitate interoperability in evolutionary comparative analysis, we present {NeXML}, an {XML} standard (inspired by the current standard, {NEXUS}) that supports exchange of richly annotated comparative data. {NeXML} defines syntax for operational taxonomic units, character-state matrices, and phylogenetic trees and networks. Documents can be validated unambiguously. Importantly, any data element can be annotated, to an arbitrary degree of richness, using a system that is both flexible and rigorous. We describe how the use of {NeXML} by the {TreeBASE} and Phenoscape projects satisfies user needs that cannot be satisfied with other available file formats. By relying on {XML} Schema Definition, the design of {NeXML} facilitates the development and deployment of software for processing, transforming, and querying documents. The adoption of {NeXML} for practical use is facilitated by the availability of (1) an online manual with code samples and a reference to all defined elements and attributes, (2) programming toolkits in most of the languages used commonly in evolutionary informatics, and (3) input-output support in several widely used software applications. An active, open, community-based development process enables future revision and expansion of {NeXML}.},
	pages = {675--689},
	number = {4},
	journaltitle = {Systematic Biology},
	shortjournal = {Syst. Biol.},
	author = {Vos, Rutger A. and Balhoff, James P. and Caravas, Jason A. and Holder, Mark T. and Lapp, Hilmar and Maddison, Wayne P. and Midford, Peter E. and Priyam, Anurag and Sukumaran, Jeet and Xia, Xuhua and Stoltzfus, Arlin},
	date = {2012-07},
	pmid = {22357728},
	pmcid = {PMC3376374},
	keywords = {Computational Biology, Software, Models, Biological, Programming Languages, Classification, Informatics, Phylogeny, Biological Evolution, Biodiversity}
}




@article{liang_expansion_2014,
	title = {Expansion of genotypic diversity and establishment of 2009 {H}1N1 pandemic-origin internal genes in pigs in {China}},
	issn = {0022-538X, 1098-5514},
	url = {http://jvi.asm.org.eproxy1.lib.hku.hk/content/early/2014/07/03/JVI.01327-14},
	doi = {10.1128/JVI.01327-14},
	abstract = {‘Two-way' transmission of influenza viruses between humans and swine has been frequently observed and the occurrence of the 2009 H1N1 pandemic influenza (pdm/09) demonstrated that swine-origin viruses could facilitate the genesis of a pandemic strain. Although multiple introductions to and reassortment in swine of the pdm/09 virus have been repeatedly reported in both Eurasia and the Americas, its long-term impact on the development of swine influenza viruses (SIVs) has not been systematically explored. Our comprehensive evolutionary studies on the complete genomes of 387 SIVs obtained from 2009 to 2012 in influenza surveillance in China revealed 17 reassortant genotypes with pdm/09-origin genes. Even though the entire 2009 pandemic virus and its surface genes cannot persist, its internal genes have becoming established and are now the predominant lineages in pigs in the region. The main persistent pdm/09-origin reassortant forms had at least 5 pdm/09-origin internal genes and their surface genes primarily of European avian-like (EA) or human H3N2-like SIV origin. These findings represent a marked change to the evolutionary patterns and ecosystem of SIVs in China. It is possible that the pdm/09-origin internal genes may be in the process of replacing EA- or triple reassortant-like internal genes. These alterations to the SIV gene pool need to be continually monitored to assess changes in the potential for SIVs to transmit to humans.
Importance Shortly after the emergence of the 2009 pandemic H1N1 (pdm/09) influenza virus, it was transmitted from humans to pigs and this continues to occur around the world. Many reassortants between pdm/09-origin viruses and enzootic swine influenza viruses (SIVs) have been detected. However, the long-term impact of pdm/09-origin viruses on the SIV gene pool, which could lead to the generation of influenza viruses with the potential to infect humans, has not been systematically examined. From extensive surveillance of SIVs over a 38-month period in southern China, it was found that, although neither complete pdm/09 viruses nor their surface genes could persist in pigs, their internal genes did persist. Over the survey period, these internal genes became predominant, potentially replacing those of the enzootic SIV lineages. The altered diversity of the SIV gene pool needs to be closely monitored for changes in the potential of SIVs to transmit to humans.},
	language = {en},
	urldate = {2017-02-15},
	journal = {Journal of Virology},
	author = {Liang, Huyi and Lam, Tommy Tsan-Yuk and Fan, Xiaohui and Chen, Xinchun and Zeng, Yu and Zhou, Ji and Duan, Lian and Tse, Maying and Chan, Chung-Hei and Li, Lifeng and Leung, Tak-Ying and Yip, Chun-Hung and Cheung, Chung-Lam and Zhou, Boping and Smith, David K. and Poon, Leo Lit-Man and Peiris, Malik and Guan, Yi and Zhu, Huachen},
	month = jul,
	year = {2014},
	pmid = {25008935},
	pages = {JVI.01327--14},
}


@article{hohna_probabilistic_2014,
	title = {Probabilistic Graphical Model Representation in Phylogenetics},
	volume = {63},
	issn = {1063-5157, 1076-836X},
	url = {http://sysbio.oxfordjournals.org/content/63/5/753},
	doi = {10.1093/sysbio/syu039},
	abstract = {Recent years have seen a rapid expansion of the model space explored in statistical phylogenetics, emphasizing the need for new approaches to statistical model representation and software development. Clear communication and representation of the chosen model is crucial for: (i) reproducibility of an analysis, (ii) model development, and (iii) software design. Moreover, a unified, clear and understandable framework for model representation lowers the barrier for beginners and nonspecialists to grasp complex phylogenetic models, including their assumptions and parameter/variable dependencies. Graphical modeling is a unifying framework that has gained in popularity in the statistical literature in recent years. The core idea is to break complex models into conditionally independent distributions. The strength lies in the comprehensibility, flexibility, and adaptability of this formalism, and the large body of computational work based on it. Graphical models are well-suited to teach statistical models, to facilitate communication among phylogeneticists and in the development of generic software for simulation and statistical inference. Here, we provide an introduction to graphical models for phylogeneticists and extend the standard graphical model representation to the realm of phylogenetics. We introduce a new graphical model component, tree plates, to capture the changing structure of the subgraph corresponding to a phylogenetic tree. We describe a range of phylogenetic models using the graphical model framework and introduce modules to simplify the representation of standard components in large and complex models. Phylogenetic model graphs can be readily used in simulation, maximum likelihood inference, and Bayesian inference using, for example, Metropolis–Hastings or Gibbs sampling of the posterior distribution. [Computation; graphical models; inference; modularization; statistical phylogenetics; tree plate.]},
	pages = {753--771},
	number = {5},
	journaltitle = {Systematic Biology},
	shortjournal = {Syst Biol},
	author = {Höhna, Sebastian and Heath, Tracy A. and Boussau, Bastien and Landis, Michael J. and Ronquist, Fredrik and Huelsenbeck, John P.},
	urldate = {2015-11-17},
	date = {2014-09-01},
	langid = {english},
	pmid = {24951559}
}

@article{boussau_genome-scale_2013,
	title = {Genome-scale coestimation of species and gene trees},
	volume = {23},
	issn = {1088-9051, 1549-5469},
	url = {http://genome.cshlp.org/content/23/2/323},
	doi = {10.1101/gr.141978.112},
	abstract = {Comparisons of gene trees and species trees are key to understanding major processes of genome evolution such as gene duplication and loss. Because current methods to reconstruct phylogenies fail to model the two-way dependency between gene trees and the species tree, they often misrepresent gene and species histories. We present a new probabilistic model to jointly infer rooted species and gene trees for dozens of genomes and thousands of gene families. We use simulations to show that this method accurately infers the species tree and gene trees, is robust to misspecification of the models of sequence and gene family evolution, and provides a precise historic record of gene duplications and losses throughout genome evolution. We simultaneously reconstruct the history of mammalian species and their genes based on 36 completely sequenced genomes, and use the reconstructed gene trees to infer the gene content and organization of ancestral mammalian genomes. We show that our method yields a more accurate picture of ancestral genomes than the trees available in the authoritative database Ensembl.},
	pages = {323--330},
	number = {2},
	journaltitle = {Genome Research},
	shortjournal = {Genome Res.},
	author = {Boussau, Bastien and Szöllősi, Gergely J. and Duret, Laurent and Gouy, Manolo and Tannier, Eric and Daubin, Vincent},
	urldate = {2015-11-17},
	date = {2013-02-01},
	langid = {english},
	pmid = {23132911}
}

@book{felsenstein_inferring_2003,
	address = {Sunderland, Mass},
	edition = {2 edition},
	title = {Inferring Phylogenies},
	isbn = {9780878931774},
	abstract = {Phylogenies (evolutionary trees) are basic to thinking about and analyzing differences between species. Statistical, computational, and algorithmic work on them has been ongoing for four decades, with great advances in understanding. Yet no book has summarized this work until now. Inferring Phylogenies explains clearly the assumptions and logic of making inferences about phylogenies, and using them to make inferences about evolutionary processes. It is an essential text and reference for anyone who wants to understand how phylogenies are reconstructed and how they are used. As phylogenies are inferred with various kinds of data, this book concentrates on some of the central ones: discretely coded characters, molecular sequences, gene frequencies, and quantitative traits. Also covered are restriction sites, {RAPDs}, and microsatellites. Inferring Phylogenies is intended for graduate-level courses, assuming some knowledge of statistics, mathematics (calculus and fundamental matrix algebra), molecular sequences, and quantitative genetics.},
	language = {English},
	publisher = {Sinauer Associates},
	author = {Felsenstein, Joseph},
	month = sep,
	year = {2003}
}

@book{wickham_ggplot2_2009,
	edition = {1},
	title = {ggplot2: Elegant Graphics for Data Analysis},
	isbn = {0387981403},
	shorttitle = {ggplot2},
	publisher = {Springer},
	author = {Wickham, Hadley},
	month = aug,
	year = {2009}
}

@article{paradis_ape_2004,
	title = {{APE}: Analyses of Phylogenetics and Evolution in R language},
	volume = {20},
	shorttitle = {{APE}},
	url = {http://bioinformatics.oxfordjournals.org/content/20/2/289.abstract},
	doi = {10.1093/bioinformatics/btg412},
	abstract = {Summary: Analysis of Phylogenetics and Evolution ({APE}) is a package written in the R language for use in molecular evolution and phylogenetics. {APE} provides both utility functions for reading and writing data and manipulating phylogenetic trees, as well as several advanced methods for phylogenetic and evolutionary analysis (e.g. comparative and population genetic methods). {APE} takes advantage of the many R functions for statistics and graphics, and also provides a flexible framework for developing and implementing further statistical methods for the analysis of evolutionary processes.Availability: The program is free and available from the official R package archive at http://cran.r-project.org/src/contrib/{PACKAGES}.html\#ape. {APE} is licensed under the {GNU} General Public License.},
	number = {2},
	urldate = {2011-03-04},
	journal = {Bioinformatics},
	author = {Paradis, Emmanuel and Claude, Julien and Strimmer, Korbinian},
	month = jan,
	year = {2004},
	pages = {289--290}
}

@article{matsen_pplacer_2010,
	title = {pplacer: linear time maximum-likelihood and Bayesian phylogenetic placement of sequences onto a fixed reference tree},
	volume = {11},
	issn = {1471-2105},
	shorttitle = {pplacer},
	url = {http://www.biomedcentral.com.eproxy1.lib.hku.hk/1471-2105/11/538},
	doi = {10.1186/1471-2105-11-538},
	language = {en},
	number = {1},
	urldate = {2015-01-05},
	journal = {{BMC} Bioinformatics},
	author = {Matsen, Frederick A and Kodner, Robin B and Armbrust, E Virginia},
	year = {2010},
	pages = {538}
}

@article{matsen_format_2012,
	title = {A Format for Phylogenetic Placements},
	volume = {7},
	url = {http://dx.doi.org/10.1371/journal.pone.0031009},
	doi = {10.1371/journal.pone.0031009},
	abstract = {We have developed a unified format for phylogenetic placements, that is, mappings of environmental sequence data (e.g., short reads) into a phylogenetic tree. We are motivated to do so by the growing number of tools for computing and post-processing phylogenetic placements, and the lack of an established standard for storing them. The format is lightweight, versatile, extensible, and is based on the {JSON} format, which can be parsed by most modern programming languages. Our format is already implemented in several tools for computing and post-processing parsimony- and likelihood-based phylogenetic placements and has worked well in practice. We believe that establishing a standard format for analyzing read placements at this early stage will lead to a more efficient development of powerful and portable post-analysis tools for the growing applications of phylogenetic placement.},
	number = {2},
	urldate = {2015-01-05},
	journal = {{PLoS} {ONE}},
	author = {Matsen, Frederick A. and Hoffman, Noah G. and Gallagher, Aaron and Stamatakis, Alexandros},
	month = feb,
	year = {2012},
	pages = {e31009}
}


@article{berger_performance_2011,
	title = {Performance, {Accuracy}, and {Web} {Server} for {Evolutionary} {Placement} of {Short} {Sequence} {Reads} under {Maximum} {Likelihood}},
	issn = {1063-5157, 1076-836X},
	url = {http://sysbio.oxfordjournals.org/content/early/2011/03/23/sysbio.syr010},
	doi = {10.1093/sysbio/syr010},
	abstract = {We present an evolutionary placement algorithm (EPA) and a Web server for the rapid assignment of sequence fragments (short reads) to edges of a given phylogenetic tree under the maximum-likelihood model. The accuracy of the algorithm is evaluated on several real-world data sets and compared with placement by pair-wise sequence comparison, using edit distances and BLAST. We introduce a slow and accurate as well as a fast and less accurate placement algorithm. For the slow algorithm, we develop additional heuristic techniques that yield almost the same run times as the fast version with only a small loss of accuracy. When those additional heuristics are employed, the run time of the more accurate algorithm is comparable with that of a simple BLAST search for data sets with a high number of short query sequences. Moreover, the accuracy of the EPA is significantly higher, in particular when the sample of taxa in the reference topology is sparse or inadequate. Our algorithm, which has been integrated into RAxML, therefore provides an equally fast but more accurate alternative to BLAST for tree-based inference of the evolutionary origin and composition of short sequence reads. We are also actively developing a Web server that offers a freely available service for computing read placements on trees using the EPA. [Maximum likelihood; metagenomics; phylogenetic placement; RAxML; short sequence reads.]},
	language = {en},
	urldate = {2016-11-10},
	journal = {Systematic Biology},
	author = {Berger, Simon A. and Krompass, Denis and Stamatakis, Alexandros},
	month = mar,
	year = {2011},
	pmid = {21436105},
	pages = {291--302}
}


@article{stamatakis_raxml_2014,
	title = {{RAxML} Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies},
	issn = {1367-4803, 1460-2059},
	url = {http://bioinformatics.oxfordjournals.org.eproxy2.lib.hku.hk/content/early/2014/01/21/bioinformatics.btu033},
	doi = {10.1093/bioinformatics/btu033},
	shorttitle = {{RAxML} Version 8},
	abstract = {Motivation: Phylogenies are increasingly used in all fields of medical and biological research. Moreover, because of the next generation sequencing revolution, datasets used for conducting phylogenetic analyses grow at an unprecedented pace. {RAxML} (Randomized Axelerated Maximum Likelihood) is a popular program for phylogenetic analyses of large datasets under maximum likelihood. Since the last {RAxML} paper in 2006, it has been continuously maintained and extended to accommodate the increasingly growing input datasets and to serve the needs of the user community.
Results: I present some of the most notable new features and extensions of {RAxML}, such as, a substantial extension of substitution models and supported data types, the introduction of {SSE}3, {AVX}, and {AVX}2 vector intrinsics, techniques for reducing the memory requirements of the code and a plethora of operations for conducting post-analyses on sets of trees. In addition, an up-to-date, 50 page user manual covering all new {RAxML} options is available.
Availability: The code is available under {GNU} {GPL} at https://github.com/stamatak/standard-{RAxML}.
Contact: Alexandros.Stamatakis@h-its.org},
	pages = {btu033},
	journaltitle = {Bioinformatics},
	shortjournal = {Bioinformatics},
	author = {Stamatakis, Alexandros},
	urldate = {2015-12-28},
	date = {2014-01-21},
	langid = {english},
	pmid = {24451623}
}

@article{mcmurdie_phyloseq_2013,
	title = {phyloseq: An R Package for Reproducible Interactive Analysis and Graphics of Microbiome Census Data},
	volume = {8},
	shorttitle = {phyloseq},
	url = {http://dx.doi.org/10.1371/journal.pone.0061217},
	doi = {10.1371/journal.pone.0061217},
	abstract = {{BackgroundThe} analysis of microbial communities through {DNA} sequencing brings many challenges: the integration of different types of data with methods from ecology, genetics, phylogenetics, multivariate statistics, visualization and testing. With the increased breadth of experimental designs now being pursued, project-specific statistical analyses are often needed, and these analyses are often difficult (or impossible) for peer researchers to independently reproduce. The vast majority of the requisite tools for performing these analyses reproducibly are already implemented in R and its extensions (packages), but with limited support for high throughput microbiome census data.{ResultsHere} we describe a software project, phyloseq, dedicated to the object-oriented representation and analysis of microbiome census data in R. It supports importing data from a variety of common formats, as well as many analysis techniques. These include calibration, filtering, subsetting, agglomeration, multi-table comparisons, diversity analysis, parallelized Fast {UniFrac}, ordination methods, and production of publication-quality graphics; all in a manner that is easy to document, share, and modify. We show how to apply functions from other R packages to phyloseq-represented data, illustrating the availability of a large number of open source analysis techniques. We discuss the use of phyloseq with tools for reproducible research, a practice common in other fields but still rare in the analysis of highly parallel microbiome census data. We have made available all of the materials necessary to completely reproduce the analysis and figures included in this article, an example of best practices for reproducible research.{ConclusionsThe} phyloseq project for R is a new open-source software package, freely available on the web from both {GitHub} and Bioconductor.},
	number = {4},
	urldate = {2015-01-05},
	journal = {{PLoS} {ONE}},
	author = {McMurdie, Paul J. and Holmes, Susan},
	month = apr,
	year = {2013},
	pages = {e61217}
}

@article{marazzi_locating_2012,
	title = {Locating Evolutionary Precursors on a Phylogenetic Tree},
	volume = {66},
	rights = {© 2012 The Author(s). Evolution© 2012 The Society for the Study of Evolution.},
	issn = {1558-5646},
	url = {http://onlinelibrary.wiley.com.eproxy2.lib.hku.hk/doi/10.1111/j.1558-5646.2012.01720.x/abstract},
	doi = {10.1111/j.1558-5646.2012.01720.x},
	abstract = {Conspicuous innovations in the history of life are often preceded by more cryptic genetic and developmental precursors. In many cases, these appear to be associated with recurring origins of very similar traits in close relatives (parallelisms) or striking convergences separated by deep time (deep homologies). Although the phylogenetic distribution of gain and loss of traits hints strongly at the existence of such precursors, no models of trait evolution currently permit inference about their location on a tree. Here we develop a new stochastic model, which explicitly captures the dependency implied by a precursor and permits estimation of precursor locations. We apply it to the evolution of extrafloral nectaries ({EFNs}), an ecologically significant trait mediating a widespread mutualism between plants and ants. In legumes, a species-rich clade with morphologically diverse {EFNs}, the precursor model fits the data on {EFN} occurrences significantly better than conventional models. The model generates explicit hypotheses about the phylogenetic location of hypothetical precursors, which may help guide future studies of molecular genetic pathways underlying nectary position, development, and function.},
	pages = {3918--3930},
	number = {12},
	journaltitle = {Evolution},
	author = {Marazzi, Brigitte and Ané, Cécile and Simon, Marcelo F. and Delgado-Salinas, Alfonso and Luckow, Melissa and Sanderson, Michael J.},
	urldate = {2015-12-28},
	date = {2012-12-01},
	langid = {english},
	keywords = {Deep homology, extra-floral nectary, homoplasy, trait evolution}
}

@article{yang_paml_2007,
	title = {{PAML} 4: Phylogenetic Analysis by Maximum Likelihood},
	volume = {24},
	issn = {0737-4038, 1537-1719},
	shorttitle = {{PAML} 4},
	url = {http://mbe.oxfordjournals.org/content/24/8/1586},
	doi = {10.1093/molbev/msm088},
	abstract = {{PAML}, currently in version 4, is a package of programs for phylogenetic analyses of {DNA} and protein sequences using maximum likelihood ({ML}). The programs may be used to compare and test phylogenetic trees, but their main strengths lie in the rich repertoire of evolutionary models implemented, which can be used to estimate parameters in models of sequence evolution and to test interesting biological hypotheses. Uses of the programs include estimation of synonymous and nonsynonymous rates ({dN} and {dS}) between two protein-coding {DNA} sequences, inference of positive Darwinian selection through phylogenetic comparison of protein-coding genes, reconstruction of ancestral genes and proteins for molecular restoration studies of extinct life forms, combined analysis of heterogeneous data sets from multiple gene loci, and estimation of species divergence times incorporating uncertainties in fossil calibrations. This note discusses some of the major applications of the package, which includes example data sets to demonstrate their use. The package is written in {ANSI} C, and runs under Windows, Mac {OSX}, and {UNIX} systems. It is available at http://abacus.gene.ucl.ac.uk/software/paml.html.},
	language = {en},
	number = {8},
	urldate = {2015-01-05},
	journal = {Molecular Biology and Evolution},
	author = {Yang, Ziheng},
	month = aug,
	year = {2007},
	pmid = {17483113},
	keywords = {codon models, likelihood, {PAML}, phylogenetic analysis, Software},
	pages = {1586--1591},
	file = {Full Text PDF:/home/ygc/baiduYun/Zotero/storage/53H9DMTJ/Yang - 2007 - PAML 4 Phylogenetic Analysis by Maximum Likelihoo.pdf:application/pdf;Snapshot:/home/ygc/baiduYun/Zotero/storage/7VIHDNKN/1586.html:text/html}
}

@article{pond_hyphy_2005,
	title = {{HyPhy}: hypothesis testing using phylogenies},
	volume = {21},
	issn = {1367-4803, 1460-2059},
	shorttitle = {{HyPhy}},
	url = {http://bioinformatics.oxfordjournals.org.eproxy1.lib.hku.hk/content/21/5/676},
	doi = {10.1093/bioinformatics/bti079},
	abstract = {Summary: The {HyPhypackage} is designed to provide a flexible and unified platform for carrying out likelihood-based analyses on multiple alignments of molecular sequence data, with the emphasis on studies of rates and patterns of sequence evolution.
Availability: http://www.hyphy.org
Contact: muse@stat.ncsu.edu
Supplementary information: {HyPhydocumentation} and tutorials are available at http://www.hyphy.org},
	language = {en},
	number = {5},
	urldate = {2015-01-05},
	journal = {Bioinformatics},
	author = {Pond, Sergei L. Kosakovsky and Frost, Simon D. W. and Muse, Spencer V.},
	month = mar,
	year = {2005},
	pmid = {15509596},
	pages = {676--679}
}

@article{bouckaert_beast_2014,
	title = {{BEAST} 2: A Software Platform for Bayesian Evolutionary Analysis},
	volume = {10},
	shorttitle = {{BEAST} 2},
	url = {http://dx.doi.org/10.1371/journal.pcbi.1003537},
	doi = {10.1371/journal.pcbi.1003537},
	abstract = {We present a new open source, extensible and flexible software platform for Bayesian evolutionary analysis called {BEAST} 2. This software platform is a re-design of the popular {BEAST} 1 platform to correct structural deficiencies that became evident as the {BEAST} 1 software evolved. Key among those deficiencies was the lack of post-deployment extensibility. {BEAST} 2 now has a fully developed package management system that allows third party developers to write additional functionality that can be directly installed to the {BEAST} 2 analysis platform via a package manager without requiring a new software release of the platform. This package architecture is showcased with a number of recently published new models encompassing birth-death-sampling tree priors, phylodynamics and model averaging for substitution models and site partitioning. A second major improvement is the ability to read/write the entire state of the {MCMC} chain to/from disk allowing it to be easily shared between multiple instances of the {BEAST} software. This facilitates checkpointing and better support for multi-processor and high-end computing extensions. Finally, the functionality in new packages can be easily added to the user interface ({BEAUti} 2) by a simple {XML} template-based mechanism because {BEAST} 2 has been re-designed to provide greater integration between the analysis engine and the user interface so that, for example {BEAST} and {BEAUti} use exactly the same {XML} file format.},
	number = {4},
	urldate = {2015-01-05},
	journal = {{PLoS} Comput Biol},
	author = {Bouckaert, Remco and Heled, Joseph and Kühnert, Denise and Vaughan, Tim and Wu, Chieh-Hsi and Xie, Dong and Suchard, Marc A. and Rambaut, Andrew and Drummond, Alexei J.},
	month = apr,
	year = {2014},
	pages = {e1003537}
}


@manual{rstats,
	title = {R: A Language and Environment for Statistical Computing},
	author = {{R Core Team}},
	organization = {R Foundation for Statistical Computing},
	address = {Vienna, Austria},
	year = {2016},
	url = {https://www.R-project.org/}
}


@article{koski_closest_2001,
	title = {The closest {BLAST} hit is often not the nearest neighbor},
	volume = {52},
	issn = {0022-2844},
	doi = {10.1007/s002390010184},
	abstract = {It is well known that basing phylogenetic reconstructions on uncorrected genetic distances can lead to errors in their reconstruction. Nevertheless, it is often common practice to report simply the most similar BLAST (Altschul et al. 1997) hit in genomic reports that discuss many genes (Ruepp et al. 2000; Freiberg et al. 1997). This is because BLAST hits can provide a rapid, efficient, and concise analysis of many genes at once. These hits are often interpreted to imply that the gene is most closely related to the gene or protein in the databases that returned the closest BLAST hit. Though these two may coincide, for many genes, particularly genes with few homologs, they may not be the same. There are a number of circumstances that can account for such limitations in accuracy (Eisen 2000). We stress here that genes appearing to be the most similar based on BLAST hits are often not each others closest relative phylogenetically. The extent to which this occurs depends on the availability of close relatives present in the databases. As an example we have chosen the analysis of the genomes of a crenarcheaota species Aeropyrum pernix, an organism with few close relatives fully sequenced, and Escherichia coli, an organism whose closest relative, Salmonella typhimurium, is completely sequenced.},
	language = {eng},
	number = {6},
	journal = {Journal of Molecular Evolution},
	author = {Koski, L. B. and Golding, G. B.},
	month = jun,
	year = {2001},
	pmid = {11443357},
	keywords = {Algorithms, Crenarchaeota, Databases, Factual, Escherichia coli, Open Reading Frames, Phylogeny, Salmonella typhimurium, Software},
	pages = {540--542}
}

@article{lemey_reconstructing_2009,
	title = {Reconstructing the initial global spread of a human influenza pandemic},
	volume = {1},
	issn = {2157-3999},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2762761/},
	doi = {10.1371/currents.RRN1031},
	abstract = {Here, we present an analysis of the H1N1pdm genetic data sampled over the initial stages in the epidemic. To infer phylodynamic spread in time and space we employ a recently developed Bayesian statistical inference framework (Lemey et al., in press). We model spatial diffusion as a continuous-time Markov chain process along time-measured genealogies. In this analysis, we consider 40 locations for which sequence data were available on 06-Aug-2009. The sampling time interval of the 242 sequences spans from 30-Mar-2009 to 12-Jul-2009. The Bayesian inference typically results in a posterior distribution of phylogenetic trees, each having an estimate of the epidemic locations at the ancestral nodes in the tree. We summarize these trees using the most representative clustering pattern and annotate these clusters with the most probable location states. We can visualize this information as tree that grows over time, seeding locations each time an ancestral node is inferred to exist at a different location. A Bayes factor test provides statistical support for epidemiological linkage throughout the evolutionary history. We demonstrate how our full probabilistic approach efficiently tracks an epidemic based on viral genetic data as it unfolds across the globe.},
	urldate = {2017-06-07},
	journal = {PLoS Currents},
	author = {Lemey, Philippe and Suchard, Marc and Rambaut, Andrew},
	month = sep,
	year = {2009},
	pmid = {20029613},
	pmcid = {PMC2762761}
}

@article{delviks-frankenberry_mechanisms_2011,
	title = {Mechanisms and {Factors} that {Influence} {High} {Frequency} {Retroviral} {Recombination}},
	volume = {3},
	copyright = {http://creativecommons.org/licenses/by/3.0/},
	url = {http://www.mdpi.com/1999-4915/3/9/1650},
	doi = {10.3390/v3091650},
	abstract = {With constantly changing environmental selection pressures, retroviruses rely upon recombination to reassort polymorphisms in their genomes and increase genetic diversity, which improves the chances for the survival of their population. Recombination occurs during DNA synthesis, whereby reverse transcriptase undergoes template switching events between the two copackaged RNAs, resulting in a viral recombinant with portions of the genetic information from each parental RNA. This review summarizes our current understanding of the factors and mechanisms influencing retroviral recombination, fidelity of the recombination process, and evaluates the subsequent viral diversity and fitness of the progeny recombinant. Specifically, the high mutation rates and high recombination frequencies of HIV-1 will be analyzed for their roles in influencing HIV-1 global diversity, as well as HIV-1 diagnosis, drug treatment, and vaccine development.},
	language = {en},
	number = {9},
	urldate = {2017-06-07},
	journal = {Viruses},
	author = {Delviks-Frankenberry, Krista and Galli, Andrea and Nikolaitchik, Olga and Mens, Helene and Pathak, Vinay K. and Hu, Wei-Shau},
	month = sep,
	year = {2011},
	keywords = {HIV-1, recombination, retrovirus},
	pages = {1650--1680},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/IASRKP3U/Delviks-Frankenberry et al. - 2011 - Mechanisms and Factors that Influence High Frequen.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/IT5G96NI/1650.html:text/html}
}


@article{huang_bat-derived_2016,
	title = {A {Bat}-{Derived} {Putative} {Cross}-{Family} {Recombinant} {Coronavirus} with a {Reovirus} {Gene}},
	volume = {12},
	issn = {1553-7374},
	doi = {10.1371/journal.ppat.1005883},
	abstract = {The emergence of severe acute respiratory syndrome coronavirus (SARS-CoV) in 2002 and Middle East respiratory syndrome coronavirus (MERS-CoV) in 2012 has generated enormous interest in the biodiversity, genomics and cross-species transmission potential of coronaviruses, especially those from bats, the second most speciose order of mammals. Herein, we identified a novel coronavirus, provisionally designated Rousettus bat coronavirus GCCDC1 (Ro-BatCoV GCCDC1), in the rectal swab samples of Rousettus leschenaulti bats by using pan-coronavirus RT-PCR and next-generation sequencing. Although the virus is similar to Rousettus bat coronavirus HKU9 (Ro-BatCoV HKU9) in genome characteristics, it is sufficiently distinct to be classified as a new species according to the criteria defined by the International Committee of Taxonomy of Viruses (ICTV). More striking was that Ro-BatCoV GCCDC1 contained a unique gene integrated into the 3'-end of the genome that has no homologs in any known coronavirus, but which sequence and phylogeny analyses indicated most likely originated from the p10 gene of a bat orthoreovirus. Subgenomic mRNA and cellular-level observations demonstrated that the p10 gene is functional and induces the formation of cell syncytia. Therefore, here we report a putative heterologous inter-family recombination event between a single-stranded, positive-sense RNA virus and a double-stranded segmented RNA virus, providing insights into the fundamental mechanisms of viral evolution.},
	language = {eng},
	number = {9},
	journal = {PLoS pathogens},
	author = {Huang, Canping and Liu, William J. and Xu, Wen and Jin, Tao and Zhao, Yingze and Song, Jingdong and Shi, Yi and Ji, Wei and Jia, Hao and Zhou, Yongming and Wen, Honghua and Zhao, Honglan and Liu, Huaxing and Li, Hong and Wang, Qihui and Wu, Ying and Wang, Liang and Liu, Di and Liu, Guang and Yu, Hongjie and Holmes, Edward C. and Lu, Lin and Gao, George F.},
	month = sep,
	year = {2016},
	pmid = {27676249},
	pmcid = {PMC5038965},
	pages = {e1005883}
}

@article{he_intragenic_2010,
	title = {Intragenic {Recombination} as a {Mechanism} of {Genetic} {Diversity} in {Bluetongue} {Virus}},
	volume = {84},
	issn = {0022-538X, 1098-5514},
	url = {http://jvi.asm.org.eproxy1.lib.hku.hk/content/84/21/11487},
	doi = {10.1128/JVI.00889-10},
	abstract = {Bluetongue (BT), caused by Bluetongue virus (BTV), is an economically important disease affecting sheep, deer, cattle, and goats. Since 1998, a series of BT outbreaks have spread across much of southern and central Europe. To study why the epidemiology of the virus happens to change, it is important to fully know the mechanisms resulting in its genetic diversity. Gene mutation and segment reassortment have been considered as the key forces driving the evolution of BTV. However, it is still unknown whether intragenic recombination can occur and contribute to the process in the virus. We present here several BTV groups containing mosaic genes to reveal that intragenic recombination can take place between the virus strains and play a potential role in bringing novel BTV lineages.},
	language = {en},
	number = {21},
	urldate = {2017-06-07},
	journal = {Journal of Virology},
	author = {He, Cheng-Qiang and Ding, Nai-Zheng and He, Mei and Li, Shan-Ni and Wang, Xing-Ming and He, Hong-Bin and Liu, Xin-Fa and Guo, Hong-Shan},
	month = nov,
	year = {2010},
	pmid = {20702614},
	pages = {11487--11495},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/5H4JQA5P/He et al. - 2010 - Intragenic Recombination as a Mechanism of Genetic.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/NWMDKHNS/11487.html:text/html}
}

@article{steinhauer_lack_1992,
	title = {Lack of evidence for proofreading mechanisms associated with an {RNA} virus polymerase},
	volume = {122},
	issn = {0378-1119},
	abstract = {The in vitro fidelity of the virion-associated RNA polymerase of vesicular stomatitis virus was quantitated for a single conserved viral RNA site and the usual high in vitro base misincorporation error frequencies (approx. 10(-3)) were observed at this (guanine) site. We sought evidence for RNA 3'--{\textgreater}5' exonuclease proofreading mechanisms by varying the concentrations of the next nucleoside triphosphate, by incorporation of nucleoside[1-thio]triphosphate analogues of the four natural RNA nucleosides, and by varying the concentrations of pyrophosphate in the in vitro polymerase reaction. None of these perturbations greatly affected viral RNA polymerase fidelity at the site studied. These results fail to show evidence for proofreading exonuclease activity associated with the virion replicase of an RNA virus. They suggest that RNA virus replication might generally be error-prone, because RNA replicase base misincorporations are proofread very inefficiently or not at all.},
	language = {eng},
	number = {2},
	journal = {Gene},
	author = {Steinhauer, D. A. and Domingo, E. and Holland, J. J.},
	month = dec,
	year = {1992},
	pmid = {1336756},
	keywords = {Base Sequence, DNA-Directed RNA Polymerases, Electrophoresis, Polyacrylamide Gel, Molecular Sequence Data, RNA, Messenger, RNA, Viral, Vesicular stomatitis Indiana virus},
	pages = {281--288}
}

@article{dobzhansky_nothing_1973,
	title = {Nothing in {Biology} {Makes} {Sense} except in the {Light} of {Evolution}},
	volume = {35},
	copyright = {Copyright 1973 National Association of Biology Teachers},
	issn = {0002-7685, 1938-4211},
	url = {http://abt.ucpress.edu/content/35/3/125},
	doi = {10.2307/4444260},
	language = {en},
	number = {3},
	urldate = {2017-06-07},
	journal = {The American Biology Teacher},
	author = {Dobzhansky, Theodosius},
	month = mar,
	year = {1973},
	pages = {125--129},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/KE3THCIS/125.html:text/html}
}

@article{salamin_building_2002,
	title = {Building {Supertrees}: {An} {Empirical} {Assessment} {Using} the {Grass} {Family} ({Poaceae})},
	volume = {51},
	issn = {1063-5157},
	shorttitle = {Building {Supertrees}},
	url = {https://academic-oup-com.eproxy2.lib.hku.hk/sysbio/article/51/1/136/1631315/Building-Supertrees-An-Empirical-Assessment-Using},
	doi = {10.1080/106351502753475916},
	number = {1},
	urldate = {2017-05-22},
	journal = {Systematic Biology},
	author = {Salamin, Nicolas and Hodkinson, Trevor R. and Savolainen, Vincent},
	month = jan,
	year = {2002},
	pages = {136--150},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/96WRQSHP/Salamin et al. - 2002 - Building Supertrees An Empirical Assessment Using.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/V5MWGXB3/Building-Supertrees-An-Empirical-Assessment-Using.html:text/html}
}

@article{semple_supertree_2000,
	title = {A supertree method for rooted trees},
	volume = {105},
	issn = {0166-218X},
	url = {http://www.sciencedirect.com/science/article/pii/S0166218X0000202X},
	doi = {10.1016/S0166-218X(00)00202-X},
	abstract = {The amalgamation of leaf-labelled (phylogenetic) trees on overlapping leaf sets into one (super)tree is a central problem in several areas of classification, particularly evolutionary biology. In this paper, we describe a new technique for amalgamating rooted phylogenetic trees. This appears to be the first such method to provably exhibit particular desirable properties which we list and establish.},
	number = {1–3},
	urldate = {2017-05-22},
	journal = {Discrete Applied Mathematics},
	author = {Semple, Charles and Steel, Mike},
	month = oct,
	year = {2000},
	keywords = {Consensus, Rooted phylogenetic tree, Supertree},
	pages = {147--158},
	file = {ScienceDirect Full Text PDF:/Volumes/HOME/Zotero/storage/U5BNWITA/Semple and Steel - 2000 - A supertree method for rooted trees.pdf:application/pdf;ScienceDirect Snapshot:/Volumes/HOME/Zotero/storage/WCZIDADG/S0166218X0000202X.html:text/html}
}

@article{eulenstein_performance_2004,
	title = {Performance of flip supertree construction with a heuristic algorithm},
	volume = {53},
	issn = {1063-5157},
	doi = {10.1080/10635150490423719},
	abstract = {Supertree methods are used to assemble separate phylogenetic trees with shared taxa into larger trees (supertrees) in an effort to construct more comprehensive phylogenetic hypotheses. In spite of much recent interest in supertrees, there are still few methods for supertree construction. The flip supertree problem is an error correction approach that seeks to find a minimum number of changes (flips) to the matrix representation of the set of input trees to resolve their incompatibilities. A previous flip supertree algorithm was limited to finding exact solutions and was only feasible for small input trees. We developed a heuristic algorithm for the flip supertree problem suitable for much larger input trees. We used a series of 48- and 96-taxon simulations to compare supertrees constructed with the flip supertree heuristic algorithm with supertrees constructed using other approaches, including MinCut (MC), modified MC (MMC), and matrix representation with parsimony (MRP). Flip supertrees are generally far more accurate than supertrees constructed using MC or MMC algorithms and are at least as accurate as supertrees built with MRP. The flip supertree method is therefore a viable alternative to other supertree methods when the number of taxa is large.},
	language = {eng},
	number = {2},
	journal = {Systematic Biology},
	author = {Eulenstein, Oliver and Chen, Duhong and Burleigh, J. Gordon and Fernández-Baca, David and Sanderson, Michael J.},
	month = apr,
	year = {2004},
	pmid = {15205054},
	keywords = {Algorithms, Classification, Computer Simulation, Phylogeny},
	pages = {299--308}
}


@article{friedrich_profdist:_2005,
	title = {{ProfDist}: a tool for the construction of large phylogenetic trees based on profile distances},
	volume = {21},
	issn = {1367-4803},
	shorttitle = {{ProfDist}},
	doi = {10.1093/bioinformatics/bti289},
	abstract = {SUMMARY: ProfDist is a user-friendly software package using the profile-neighbor-joining method (PNJ) in inferring phylogenies based on profile distances on DNA or RNA sequences. It is a tool for reconstructing and visualizing large phylogenetic trees providing new and standard features with a special focus on time efficency, robustness and accuracy.
AVAILABILITY: A Windows version of ProfDist comes with a graphical user interface and is freely available at http://profdist.bioapps.biozentrum.uni-wuerzburg.de},
	language = {eng},
	number = {9},
	journal = {Bioinformatics (Oxford, England)},
	author = {Friedrich, Joachim and Dandekar, Thomas and Wolf, Matthias and Müller, Tobias},
	month = may,
	year = {2005},
	pmid = {15677706},
	keywords = {Algorithms, Chromosome Mapping, DNA Mutational Analysis, Gene Expression Profiling, Linkage Disequilibrium, Phylogeny, Sequence Alignment, Sequence Analysis, DNA, Software, User-Computer Interface},
	pages = {2108--2109}
}


@article{nelson_spatial_2011,
	title = {Spatial {Dynamics} of {Human}-{Origin} {H}1 {Influenza} {A} {Virus} in {North} {American} {Swine}},
	volume = {7},
	issn = {1553-7374},
	url = {http://journals.plos.org/plospathogens/article?id=10.1371/journal.ppat.1002077},
	doi = {10.1371/journal.ppat.1002077},
	abstract = {Author Summary Since 1998, genetically and antigenically diverse influenza A viruses have circulated in North American swine due to continuous cross-species transmission and reassortment with avian and human influenza viruses, presenting a pandemic threat to humans. Millions of swine are transported year-round from the southern United States into the corn-rich Midwest, but the importance of these movements in the spatial dissemination and evolution of the influenza virus in swine is unknown. Using a large data set of influenza virus sequences collected in North American swine during 2003–2010, we investigated the spatial dynamics of two influenza viruses of the H1 subtype that were introduced into swine from humans around 2003. Employing recently developed Bayesian phylogeography methods, we find that the spread of this influenza virus follows the large-scale transport of swine from the South to the Midwest. Based on this pattern of viral migration, we suggest that the genetic diversity of swine influenza viruses in the Midwest is continually augmented by the importation of viruses from source populations located in the South. Understanding the importance of long-distance pig movements in the evolution and spatial dissemination of influenza virus in swine may inform future strategies for the surveillance and control of influenza, and perhaps other swine pathogens.},
	number = {6},
	urldate = {2017-05-22},
	journal = {PLOS Pathogens},
	author = {Nelson, Martha I. and Lemey, Philippe and Tan, Yi and Vincent, Amy and Lam, Tommy Tsan-Yuk and Detmer, Susan and Viboud, Cécile and Suchard, Marc A. and Rambaut, Andrew and Holmes, Edward C. and Gramer, Marie},
	month = jun,
	year = {2011},
	keywords = {H1N1, Influenza A virus, Influenza viruses, phylogenetic analysis, phylogenetics, Phylogeography, Swine, Swine influenza},
	pages = {e1002077},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/JDR2JP5E/Nelson et al. - 2011 - Spatial Dynamics of Human-Origin H1 Influenza A Vi.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/3SDAAC5F/article.html:text/html}
}


@article{colijn_phylogenetic_2014,
	title = {Phylogenetic tree shapes resolve disease transmission patterns},
	volume = {2014},
	issn = {2050-6201},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4097963/},
	doi = {10.1093/emph/eou018},
	abstract = {The shapes of phylogenies of pathogens can reveal patterns in how an outbreak spreads. We used simple features to summarise the shapes of pathogen phylogenies. This provided enough information to distinguish outbreaks with super-spreaders, outbreaks spreading homogeneously, and those with chains of transmission., Background and Objectives: Whole-genome sequencing is becoming popular as a tool for understanding outbreaks of communicable diseases, with phylogenetic trees being used to identify individual transmission events or to characterize outbreak-level overall transmission dynamics. Existing methods to infer transmission dynamics from sequence data rely on well-characterized infectious periods, epidemiological and clinical metadata which may not always be available, and typically require computationally intensive analysis focusing on the branch lengths in phylogenetic trees. We sought to determine whether the topological structures of phylogenetic trees contain signatures of the transmission patterns underlying an outbreak., Methodology: We use simulated outbreaks to train and then test computational classifiers. We test the method on data from two real-world outbreaks., Results: We show that different transmission patterns result in quantitatively different phylogenetic tree shapes. We describe topological features that summarize a phylogeny’s structure and find that computational classifiers based on these are capable of predicting an outbreak’s transmission dynamics. The method is robust to variations in the transmission parameters and network types, and recapitulates known epidemiology of previously characterized real-world outbreaks., Conclusions and implications: There are simple structural properties of phylogenetic trees which, when combined, can distinguish communicable disease outbreaks with a super-spreader, homogeneous transmission and chains of transmission. This is possible using genome data alone, and can be done during an outbreak. We discuss the implications for management of outbreaks.},
	number = {1},
	urldate = {2017-05-22},
	journal = {Evolution, Medicine, and Public Health},
	author = {Colijn, Caroline and Gardy, Jennifer},
	month = jun,
	year = {2014},
	pmid = {24916411},
	pmcid = {PMC4097963},
	pages = {96--108},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/PPPERVV6/Colijn and Gardy - 2014 - Phylogenetic tree shapes resolve disease transmiss.pdf:application/pdf}
}

@article{volz_inferring_2013,
	title = {Inferring the {Source} of {Transmission} with {Phylogenetic} {Data}},
	volume = {9},
	issn = {1553-7358},
	url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003397},
	doi = {10.1371/journal.pcbi.1003397},
	abstract = {Author Summary Molecular data from pathogens may be useful for identifying the source of infection and identifying pairs of individuals such that one host transmitted to the other. Inference of who acquired infection from whom is confounded by incomplete sampling, and given genetic data only, it is not possible to infer the direction of transmission in a transmission pair. Given additional information about an infectious disease epidemic, such as incidence of infection over time, and the proportion of hosts sampled, it is possible to correct for biases stemming from incomplete sampling of the infected host population. It may even be possible to infer the direction of transmission within a transmission pair if additional clinical, behavioral, and demographic covariates of the infected hosts are available. We consider the problem of identifying the source of infection using HIV sequence data collected for clinical purposes. We find that it is rarely possible to infer transmission pairs with high credibility, but such data may nevertheless be useful for epidemiological investigations and identifying risk factors for transmission.},
	number = {12},
	urldate = {2017-05-22},
	journal = {PLOS Computational Biology},
	author = {Volz, Erik M. and Frost, Simon D. W.},
	month = dec,
	year = {2013},
	keywords = {Cherries, HIV, HIV epidemiology, Pathogens, phylogenetic analysis, phylogenetics, Sequence Alignment, Simulation and modeling},
	pages = {e1003397},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/96J943UD/Volz and Frost - 2013 - Inferring the Source of Transmission with Phylogen.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/WJUXVRPA/article.html:text/html}
}


@article{lam_use_2010,
	title = {Use of phylogenetics in the molecular epidemiology and evolutionary studies of viral infections},
	volume = {47},
	issn = {1549-781X},
	doi = {10.3109/10408361003633318},
	abstract = {Since DNA sequencing techniques first became available almost 30 years ago, the amount of nucleic acid sequence data has increased enormously. Phylogenetics, which is widely applied to compare and analyze such data, is particularly useful for the analysis of genes from rapidly evolving viruses. It has been used extensively to describe the molecular epidemiology and transmission of the human immunodeficiency virus (HIV), the origins and subsequent evolution of the severe acute respiratory syndrome (SARS)-associated coronavirus (SCoV), and, more recently, the evolving epidemiology of avian influenza as well as seasonal and pandemic human influenza viruses. Recent advances in phylogenetic methods can infer more in-depth information about the patterns of virus emergence, adding to the conventional approaches in viral epidemiology. Examples of this information include estimations (with confidence limits) of the actual time of the origin of a new viral strain or its emergence in a new species, viral recombination and reassortment events, the rate of population size change in a viral epidemic, and how the virus spreads and evolves within a specific population and geographical region. Such sequence-derived information obtained from the phylogenetic tree can assist in the design and implementation of public health and therapeutic interventions. However, application of many of these advanced phylogenetic methods are currently limited to specialized phylogeneticists and statisticians, mainly because of their mathematical basis and their dependence on the use of a large number of computer programs. This review attempts to bridge this gap by presenting conceptual, technical, and practical aspects of applying phylogenetic methods in studies of influenza, HIV, and SCoV. It aims to provide, with minimal mathematics and statistics, a practical overview of how phylogenetic methods can be incorporated into virological studies by clinical and laboratory specialists.},
	language = {eng},
	number = {1},
	journal = {Critical Reviews in Clinical Laboratory Sciences},
	author = {Lam, Tommy Tsan-Yuk and Hon, Chung-Chau and Tang, Julian W.},
	month = feb,
	year = {2010},
	pmid = {20367503},
	keywords = {Animals, Evolution, Molecular, Host-Pathogen Interactions, Humans, Molecular Epidemiology, Phylogeny, Virus Diseases, Viruses},
	pages = {5--49}
}

@article{parrish_influenza_2015,
	title = {Influenza {Virus} {Reservoirs} and {Intermediate} {Hosts}: {Dogs}, {Horses}, and {New} {Possibilities} for {Influenza} {Virus} {Exposure} of {Humans}},
	volume = {89},
	issn = {0022-538X, 1098-5514},
	shorttitle = {Influenza {Virus} {Reservoirs} and {Intermediate} {Hosts}},
	url = {http://jvi.asm.org.eproxy2.lib.hku.hk/content/89/6/2990},
	doi = {10.1128/JVI.03146-14},
	abstract = {Influenza A virus (IAV) infections in hosts outside the main aquatic bird reservoirs occur periodically. Although most such cross-species transmission events result in limited onward transmission in the new host, sustained influenza outbreaks have occurred in poultry and in a number of mammalian species, including humans, pigs, horses, seals, and mink. Recently, two distinct strains of IAV have emerged in domestic dogs, with each circulating widely for several years. Here, we briefly outline what is known about the role of intermediate hosts in influenza emergence, summarize our knowledge of the new canine influenza viruses (CIVs) and how they provide key new information on the process of host adaptation, and assess the risk these viruses pose to human populations.},
	language = {en},
	number = {6},
	urldate = {2017-05-19},
	journal = {Journal of Virology},
	author = {Parrish, Colin R. and Murcia, Pablo R. and Holmes, Edward C.},
	month = mar,
	year = {2015},
	pmid = {25540375},
	pages = {2990--2994},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/GVPIB386/2990.html:text/html}
}


@article{price_fasttree_2010,
	title = {{FastTree} 2 – {Approximately} {Maximum}-{Likelihood} {Trees} for {Large} {Alignments}},
	volume = {5},
	issn = {1932-6203},
	url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0009490},
	doi = {10.1371/journal.pone.0009490},
	abstract = {Background We recently described FastTree, a tool for inferring phylogenies for alignments with up to hundreds of thousands of sequences. Here, we describe improvements to FastTree that improve its accuracy without sacrificing scalability.  Methodology/Principal Findings Where FastTree 1 used nearest-neighbor interchanges (NNIs) and the minimum-evolution criterion to improve the tree, FastTree 2 adds minimum-evolution subtree-pruning-regrafting (SPRs) and maximum-likelihood NNIs. FastTree 2 uses heuristics to restrict the search for better trees and estimates a rate of evolution for each site (the “CAT” approximation). Nevertheless, for both simulated and genuine alignments, FastTree 2 is slightly more accurate than a standard implementation of maximum-likelihood NNIs (PhyML 3 with default settings). Although FastTree 2 is not quite as accurate as methods that use maximum-likelihood SPRs, most of the splits that disagree are poorly supported, and for large alignments, FastTree 2 is 100–1,000 times faster. FastTree 2 inferred a topology and likelihood-based local support values for 237,882 distinct 16S ribosomal RNAs on a desktop computer in 22 hours and 5.8 gigabytes of memory.  Conclusions/Significance FastTree 2 allows the inference of maximum-likelihood phylogenies for huge alignments. FastTree 2 is freely available at http://www.microbesonline.org/fasttree.},
	number = {3},
	urldate = {2017-04-30},
	journal = {PLOS ONE},
	author = {Price, Morgan N. and Dehal, Paramvir S. and Arkin, Adam P.},
	month = mar,
	year = {2010},
	keywords = {Biochemical simulations, Biophysical simulations, Multiple alignment calculation, Optimization, phylogenetic analysis, Protein structure comparison, Ribosomal RNA, Sequence Alignment},
	pages = {e9490},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/SEMG6Z4W/Price et al. - 2010 - FastTree 2 – Approximately Maximum-Likelihood Tree.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/FV7U2S33/article.html:text/html}
}


@article{yin_ggbio:_2012,
	title = {ggbio: an {R} package for extending the grammar of graphics for genomic data},
	volume = {13},
	copyright = {2012 Yin et al.; licensee BioMed Central Ltd.},
	issn = {1474-760X},
	shorttitle = {ggbio},
	url = {https://genomebiology.biomedcentral.com/articles/10.1186/gb-2012-13-8-r77},
	doi = {10.1186/gb-2012-13-8-r77},
	abstract = {We introduce ggbio, a new methodology to visualize and explore genomics annotationsand high-throughput data. The plots provide detailed views of genomic regions,summary views of sequence alignments and splicing patterns, and genome-wide overviewswith karyogram, circular and grand linear layouts. The methods leverage thestatistical functionality available in R, the grammar of graphics and the datahandling capabilities of the Bioconductor project. The plots are specified within amodular framework that enables users to construct plots in a systematic way, and aregenerated directly from Bioconductor data structures. The ggbio R package isavailable at
                  http://www.bioconductor.org/packages/2.11/bioc/html/ggbio.html

                .},
	language = {En},
	number = {8},
	urldate = {2017-05-16},
	journal = {Genome Biology},
	author = {Yin, Tengfei and Cook, Dianne and Lawrence, Michael},
	month = aug,
	year = {2012},
	pages = {R77},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/BAUVSJM5/Yin et al. - 2012 - ggbio an R package for extending the grammar of g.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/6B54QURX/gb-2012-13-8-r77.html:text/html}
}

@article{chevenet_treedyn:_2006,
	title = {{TreeDyn}: towards dynamic graphics and annotations for analyses of trees},
	volume = {7},
	issn = {1471-2105},
	shorttitle = {{TreeDyn}},
	doi = {10.1186/1471-2105-7-439},
	abstract = {BACKGROUND: Analyses of biomolecules for biodiversity, phylogeny or structure/function studies often use graphical tree representations. Many powerful tree editors are now available, but existing tree visualization tools make little use of meta-information related to the entities under study such as taxonomic descriptions or gene functions that can hardly be encoded within the tree itself (if using popular tree formats). Consequently, a tedious manual analysis and post-processing of the tree graphics are required if one needs to use external information for displaying or investigating trees.
RESULTS: We have developed TreeDyn, a tool using annotations and dynamic graphical methods for editing and analyzing multiple trees. The main features of TreeDyn are 1) the management of multiple windows and multiple trees per window, 2) the export of graphics to several standard file formats with or without HTML encapsulation and a new format called TGF, which enables saving and restoring graphical analysis, 3) the projection of texts or symbols facing leaf labels or linked to nodes, through manual pasting or by using annotation files, 4) the highlight of graphical elements after querying leaf labels (or annotations) or by selection of graphical elements and information extraction, 5) the highlight of targeted trees according to a source tree browsed by the user, 6) powerful scripts for automating repetitive graphical tasks, 7) a command line interpreter enabling the use of TreeDyn through CGI scripts for online building of trees, 8) the inclusion of a library of packages dedicated to specific research fields involving trees.
CONCLUSION: TreeDyn is a tree visualization and annotation tool which includes tools for tree manipulation and annotation and uses meta-information through dynamic graphical operators or scripting to help analyses and annotations of single trees or tree collections.},
	language = {eng},
	journal = {BMC bioinformatics},
	author = {Chevenet, François and Brun, Christine and Bañuls, Anne-Laure and Jacq, Bernard and Christen, Richard},
	month = oct,
	year = {2006},
	pmid = {17032440},
	pmcid = {PMC1615880},
	keywords = {Computer Graphics, Databases, Genetic, Decision Trees},
	pages = {439}
}


@article{he_evolview_2016,
	title = {Evolview v2: an online visualization and management tool for customized and annotated phylogenetic trees},
	volume = {44},
	issn = {1362-4962},
	shorttitle = {Evolview v2},
	doi = {10.1093/nar/gkw370},
	abstract = {Evolview is an online visualization and management tool for customized and annotated phylogenetic trees. It allows users to visualize phylogenetic trees in various formats, customize the trees through built-in functions and user-supplied datasets and export the customization results to publication-ready figures. Its 'dataset system' contains not only the data to be visualized on the tree, but also 'modifiers' that control various aspects of the graphical annotation. Evolview is a single-page application (like Gmail); its carefully designed interface allows users to upload, visualize, manipulate and manage trees and datasets all in a single webpage. Developments since the last public release include a modern dataset editor with keyword highlighting functionality, seven newly added types of annotation datasets, collaboration support that allows users to share their trees and datasets and various improvements of the web interface and performance. In addition, we included eleven new 'Demo' trees to demonstrate the basic functionalities of Evolview, and five new 'Showcase' trees inspired by publications to showcase the power of Evolview in producing publication-ready figures. Evolview is freely available at: http://www.evolgenius.info/evolview/.},
	language = {eng},
	number = {W1},
	journal = {Nucleic Acids Research},
	author = {He, Zilong and Zhang, Huangkai and Gao, Shenghan and Lercher, Martin J. and Chen, Wei-Hua and Hu, Songnian},
	month = jul,
	year = {2016},
	pmid = {27131786},
	pmcid = {PMC4987921},
	pages = {W236--241}
}

@article{huson_dendroscope_2012,
	title = {Dendroscope 3: an interactive tool for rooted phylogenetic trees and networks},
	volume = {61},
	issn = {1076-836X},
	shorttitle = {Dendroscope 3},
	doi = {10.1093/sysbio/sys062},
	abstract = {Dendroscope 3 is a new program for working with rooted phylogenetic trees and networks. It provides a number of methods for drawing and comparing rooted phylogenetic networks, and for computing them from rooted trees. The program can be used interactively or in command-line mode. The program is written in Java, use of the software is free, and installers for all 3 major operating systems can be downloaded from www.dendroscope.org. [Phylogenetic trees; phylogenetic networks; software.].},
	language = {eng},
	number = {6},
	journal = {Systematic Biology},
	author = {Huson, Daniel H. and Scornavacca, Celine},
	month = dec,
	year = {2012},
	pmid = {22780991},
	keywords = {Classification, Phylogeny, Software},
	pages = {1061--1067}
}

@article{page_visualizing_2002,
	title = {Visualizing phylogenetic trees using {TreeView}},
	volume = {Chapter 6},
	issn = {1934-340X},
	doi = {10.1002/0471250953.bi0602s01},
	abstract = {TreeView provides a simple way to view the phylogenetic trees produced by a range of programs, such as PAUP*, PHYLIP, TREE-PUZZLE, and ClustalX. While some phylogenetic programs (such as the Macintosh version of PAUP*) have excellent tree printing facilities, many programs do not have the ability to generate publication quality trees. TreeView addresses this need. The program can read and write a range of tree file formats, display trees in a variety of styles, print trees, and save the tree as a graphic file. Protocols in this unit cover both displaying and printing a tree. Support protocols describe how to download and install TreeView, and how to display bootstrap values in trees generated by ClustalX and PAUP*.},
	language = {eng},
	journal = {Current Protocols in Bioinformatics},
	author = {Page, Roderic D. M.},
	month = aug,
	year = {2002},
	pmid = {18792942},
	keywords = {Algorithms, Computer Graphics, Computer Simulation, Information Storage and Retrieval, Models, Genetic, Phylogeny, Software, User-Computer Interface},
	pages = {Unit 6.2}
}

@article{letunic_interactive_2007,
	title = {Interactive {Tree} {Of} {Life} ({iTOL}): an online tool for phylogenetic tree display and annotation},
	volume = {23},
	shorttitle = {Interactive {Tree} {Of} {Life} ({iTOL})},
	url = {http://bioinformatics.oxfordjournals.org/content/23/1/127.abstract},
	doi = {10.1093/bioinformatics/btl529},
	abstract = {Summary: Interactive Tree Of Life (iTOL) is a web-based tool for the display, manipulation and annotation of phylogenetic trees. Trees can be interactively pruned and re-rooted. Various types of data such as genome sizes or protein domain repertoires can be mapped onto the tree. Export to several bitmap and vector graphics formats is supported.Availability: iTOL is available at http://itol.embl.deContact: bork@embl.de},
	number = {1},
	urldate = {2011-02-28},
	journal = {Bioinformatics},
	author = {Letunic, Ivica and Bork, Peer},
	year = {2007},
	pages = {127--128},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/ZT7CRCTD/Letunic and Bork - Interactive Tree Of Life (iTOL) an online tool fo.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/AJV8UITW/127.html:text/html}
}

@article{yu_ggtree:_2017,
	title = {ggtree: an r package for visualization and annotation of phylogenetic trees with their covariates and other associated data},
	volume = {8},
	issn = {2041-210X},
	shorttitle = {ggtree},
	url = {http://onlinelibrary.wiley.com.eproxy2.lib.hku.hk/doi/10.1111/2041-210X.12628/abstract},
	doi = {10.1111/2041-210X.12628},
	abstract = {* We present an r package, ggtree, which provides programmable visualization and annotation of phylogenetic trees.


* ggtree can read more tree file formats than other softwares, including newick, nexus, NHX, phylip and jplace formats, and support visualization of phylo, multiphylo, phylo4, phylo4d, obkdata and phyloseq tree objects defined in other r packages. It can also extract the tree/branch/node-specific and other data from the analysis outputs of beast, epa, hyphy, paml, phylodog, pplacer, r8s, raxml and revbayes software, and allows using these data to annotate the tree.


* The package allows colouring and annotation of a tree by numerical/categorical node attributes, manipulating a tree by rotating, collapsing and zooming out clades, highlighting user selected clades or operational taxonomic units and exploration of a large tree by zooming into a selected portion.


* A two-dimensional tree can be drawn by scaling the tree width based on an attribute of the nodes. A tree can be annotated with an associated numerical matrix (as a heat map), multiple sequence alignment, subplots or silhouette images.


* The package ggtree is released under the artistic-2.0 license. The source code and documents are freely available through bioconductor (http://www.bioconductor.org/packages/ggtree).},
	language = {en},
	number = {1},
	urldate = {2017-03-07},
	journal = {Methods in Ecology and Evolution},
	author = {Yu, Guangchuang and Smith, David K. and Zhu, Huachen and Guan, Yi and Lam, Tommy Tsan-Yuk},
	month = jan,
	year = {2017},
	keywords = {annotation, bioconductor, Evolution, Phylogeny, r package, visualization},
	pages = {28--36},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/S3H2KHCZ/Yu et al. - 2017 - ggtree an r package for visualization and annotat.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/F5SF96WG/abstract.html:text/html}
}

@article{kumar_mega7:_2016,
	title = {{MEGA}7: {Molecular} {Evolutionary} {Genetics} {Analysis} {Version} 7.0 for {Bigger} {Datasets}},
	volume = {33},
	issn = {0737-4038},
	shorttitle = {{MEGA}7},
	url = {https://academic-oup-com.eproxy1.lib.hku.hk/mbe/article/33/7/1870/2579089/MEGA7-Molecular-Evolutionary-Genetics-Analysis},
	doi = {10.1093/molbev/msw054},
	number = {7},
	urldate = {2017-04-30},
	journal = {Molecular Biology and Evolution},
	author = {Kumar, Sudhir and Stecher, Glen and Tamura, Koichiro},
	month = jul,
	year = {2016},
	pages = {1870--1874},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/4B4H99HC/Kumar et al. - 2016 - MEGA7 Molecular Evolutionary Genetics Analysis Ve.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/IPXDWWFG/msw054.html:text/html}
}

@article{williams_identification_2016,
	title = {Identification of neutral tumor evolution across cancer types},
	volume = {48},
	copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
	issn = {1061-4036},
	url = {http://www.nature.com.eproxy1.lib.hku.hk/ng/journal/v48/n3/full/ng.3489.html},
	doi = {10.1038/ng.3489},
	abstract = {Despite extraordinary efforts to profile cancer genomes, interpreting the vast amount of genomic data in the light of cancer evolution remains challenging. Here we demonstrate that neutral tumor evolution results in a power-law distribution of the mutant allele frequencies reported by next-generation sequencing of tumor bulk samples. We find that the neutral power law fits with high precision 323 of 904 cancers from 14 types and from different cohorts. In malignancies identified as evolving neutrally, all clonal selection seemingly occurred before the onset of cancer growth and not in later-arising subclones, resulting in numerous passenger mutations that are responsible for intratumoral heterogeneity. Reanalyzing cancer sequencing data within the neutral framework allowed the measurement, in each patient, of both the in vivo mutation rate and the order and timing of mutations. This result provides a new way to interpret existing cancer genomic data and to discriminate between functional and non-functional intratumoral heterogeneity.},
	language = {en},
	number = {3},
	urldate = {2017-04-30},
	journal = {Nature Genetics},
	author = {Williams, Marc J. and Werner, Benjamin and Barnes, Chris P. and Graham, Trevor A. and Sottoriva, Andrea},
	month = mar,
	year = {2016},
	keywords = {Cancer, Computational biology and bioinformatics, genetics},
	pages = {238--244},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/THVXP3JD/Williams et al. - 2016 - Identification of neutral tumor evolution across c.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/DP5ECU3P/ng.3489.html:text/html}
}

@article{zou_research_2016,
	title = {Research on the human virome: where are we and what is next},
	volume = {4},
	issn = {2049-2618},
	shorttitle = {Research on the human virome},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4919837/},
	doi = {10.1186/s40168-016-0177-y},
	abstract = {The National Heart, Lung, and Blood Institute (NHLBI) of the National Institutes of Health convened a Working Group on the Microbiome in Cardiovascular, Pulmonary and Hematologic Health and Diseases from June 25, 2014, to June 26, 2014. The Working Group’s central goal was to define what major microbiome research areas warranted additional study in the context of heart, lung, and blood (HLB) diseases. The Working Group identified studies of the human virome a key priority.},
	urldate = {2017-04-30},
	journal = {Microbiome},
	author = {Zou, Shimian and Caler, Lis and Colombini-Hatch, Sandra and Glynn, Simone and Srinivas, Pothur},
	month = jun,
	year = {2016},
	pmid = {27341799},
	pmcid = {PMC4919837},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/M4VQFV2P/Zou et al. - 2016 - Research on the human virome where are we and wha.pdf:application/pdf}
}

@article{issa_big_2014,
	title = {Big data: the next frontier for innovation in therapeutics and healthcare},
	volume = {7},
	issn = {1751-2433},
	shorttitle = {Big data},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4448933/},
	doi = {10.1586/17512433.2014.905201},
	abstract = {Advancements in genomics and personalized medicine not only effect healthcare delivery from patient and provider standpoints, but also reshape biomedical discovery. We are in the era of the “-omics”, wherein an individual’s genome, transcriptome, proteome and metabolome can be scrutinized to the finest resolution to paint a personalized biochemical fingerprint that enables tailored treatments, prognoses, risk factors, etc. Digitization of this information parlays into “big data” informatics-driven evidence-based medical practice. While individualized patient management is a key beneficiary of next-generation medical informatics, this data also harbors a wealth of novel therapeutic discoveries waiting to be uncovered. “Big data” informatics allows for networks-driven systems pharmacodynamics whereby drug information can be coupled to cellular- and organ-level physiology for determining whole-body outcomes. Patient “-omics” data can be integrated for ontology-based data-mining for the discovery of new biological associations and drug targets. Here we highlight the potential of “big data” informatics for clinical pharmacology.},
	number = {3},
	urldate = {2017-04-30},
	journal = {Expert review of clinical pharmacology},
	author = {Issa, Naiem T and Byers, Stephen W and Dakshanamurthy, Sivanesan},
	month = may,
	year = {2014},
	pmid = {24702684},
	pmcid = {PMC4448933},
	pages = {293--298},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/8JJNJ4SE/Issa et al. - 2014 - Big data the next frontier for innovation in ther.pdf:application/pdf}
}

@article{schork_personalized_2015,
	title = {Personalized medicine: {Time} for one-person trials},
	volume = {520},
	shorttitle = {Personalized medicine},
	url = {http://www.nature.com.eproxy1.lib.hku.hk/news/personalized-medicine-time-for-one-person-trials-1.17411},
	doi = {10.1038/520609a},
	abstract = {Precision medicine requires a different type of clinical trial that focuses on individual, not average, responses to therapy, says Nicholas J. Schork.},
	number = {7549},
	urldate = {2017-04-30},
	journal = {Nature News},
	author = {Schork, Nicholas J.},
	month = apr,
	year = {2015},
	pages = {609},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/CDZQVK8M/personalized-medicine-time-for-one-person-trials-1.html:text/html}
}

@article{gilbert_earth_2014,
	title = {The {Earth} {Microbiome} project: successes and aspirations},
	volume = {12},
	copyright = {2014 Gilbert et al.; licensee BioMed Central},
	issn = {1741-7007},
	shorttitle = {The {Earth} {Microbiome} project},
	url = {http://bmcbiol.biomedcentral.com/articles/10.1186/s12915-014-0069-1},
	doi = {10.1186/s12915-014-0069-1},
	language = {En},
	number = {1},
	urldate = {2017-04-30},
	journal = {BMC Biology},
	author = {Gilbert, Jack A. and Jansson, Janet K. and Knight, Rob},
	month = aug,
	year = {2014},
	pages = {69},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/JM88WRJH/Gilbert et al. - 2014 - The Earth Microbiome project successes and aspira.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/BZISBI3V/s12915-014-0069-1.html:text/html}
}

@book{jukes_evolution_1969,
	series = {Evolution of {Protein} {Molecules}},
	title = {Evolution of {Protein} {Molecules}},
	publisher = {Academy Press},
	author = {Jukes, TH and Cantor, CR},
	editor = {Munro, HN},
	year = {1969},
	pages = {21-132},
	keywords = {compbio, fresco}
}

@article{felsenstein_cases_1978,
	title = {Cases in which {Parsimony} or {Compatibility} {Methods} will be {Positively} {Misleading}},
	volume = {27},
	issn = {1063-5157},
	url = {https://academic-oup-com.eproxy2.lib.hku.hk/sysbio/article-abstract/27/4/401/1734959/Cases-in-which-Parsimony-or-Compatibility-Methods},
	doi = {10.1093/sysbio/27.4.401},
	number = {4},
	urldate = {2017-04-22},
	journal = {Systematic Biology},
	author = {Felsenstein, Joseph},
	month = dec,
	year = {1978},
	pages = {401--410}
}

@article{fitch_toward_1971,
	title = {Toward {Defining} the {Course} of {Evolution}: {Minimum} {Change} for a {Specific} {Tree} {Topology}},
	volume = {20},
	issn = {0039-7989},
	shorttitle = {Toward {Defining} the {Course} of {Evolution}},
	url = {http://www.jstor.org/stable/2412116},
	doi = {10.2307/2412116},
	abstract = {A method is presented that is asserted to provide all hypothetical ancestral character states that are consistent with describing the descent of the present-day character states in a minimum number of changes of state using a predetermined phylogenetic relationship among the taxa represented. The character states used as examples are the four messenger RNA nucleotides encoding the amino acid sequences of proteins, but the method is general.},
	number = {4},
	urldate = {2017-04-22},
	journal = {Systematic Zoology},
	author = {Fitch, Walter M.},
	year = {1971},
	pages = {406--416}
}

@article{rothberg_development_2008,
	title = {The development and impact of 454 sequencing},
	volume = {26},
	copyright = {© 2008 Nature Publishing Group},
	issn = {1087-0156},
	url = {http://www.nature.com.eproxy2.lib.hku.hk/nbt/journal/v26/n10/full/nbt1485.html},
	doi = {10.1038/nbt1485},
	abstract = {The 454 Sequencer has dramatically increased the volume of sequencing conducted by the scientific community and expanded the range of problems that can be addressed by the direct readouts of DNA sequence. Key breakthroughs in the development of the 454 sequencing platform included higher throughput, simplified all in vitro sample preparation and the miniaturization of sequencing chemistries, enabling massively parallel sequencing reactions to be carried out at a scale and cost not previously possible. Together with other recently released next-generation technologies, the 454 platform has started to democratize sequencing, providing individual laboratories with access to capacities that rival those previously found only at a handful of large sequencing centers. Over the past 18 months, 454 sequencing has led to a better understanding of the structure of the human genome, allowed the first non-Sanger sequence of an individual human and opened up new approaches to identify small RNAs. To make next-generation technologies more widely accessible, they must become easier to use and less costly. In the longer term, the principles established by 454 sequencing might reduce cost further, potentially enabling personalized genomics.},
	language = {en},
	number = {10},
	urldate = {2017-04-21},
	journal = {Nature Biotechnology},
	author = {Rothberg, Jonathan M. and Leamon, John H.},
	month = oct,
	year = {2008},
	pages = {1117--1124},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/ZCVMDPQQ/nbt1485.html:text/html}
}

@article{ronaghi_real-time_1996,
	title = {Real-time {DNA} sequencing using detection of pyrophosphate release},
	volume = {242},
	issn = {0003-2697},
	doi = {10.1006/abio.1996.0432},
	abstract = {An approach for real-time DNA sequencing without the need for electrophoresis has been developed. The approach relies on the detection of DNA polymerase activity by an enzymatic luminometric inorganic pyrophosphate (PPi) detection assay (ELIDA) (Nyrén, P. (1987) Anal. Biochem. 167, 235-238). The PPi formed in the DNA polymerase reaction is converted to ATP by ATP sulfurylase and the ATP production is continuously monitored by the firefly luciferase. In the sequencing procedure, immobilized single-stranded template was used in a repeated cycle of deoxynucleotide extension. Real-time signals in the ELIDA, proportional to the amount of incorporated nucleotide, were observed when complementary bases were incorporated. An increased signal-to-noise ratio was obtained by substitution of deoxyadenosine alpha-thiotriphosphate (dATP alpha S) for the natural deoxyadenosine triphosphate, dATP alpha S is efficiently used by the DNA polymerase, but is not recognized by the luciferase. As a model, 15 bases of a single-stranded PCR product were sequenced. The possibility for parallel processing of many samples in an automated manner is discussed.},
	language = {eng},
	number = {1},
	journal = {Analytical Biochemistry},
	author = {Ronaghi, M. and Karamohamed, S. and Pettersson, B. and Uhlén, M. and Nyrén, P.},
	month = nov,
	year = {1996},
	pmid = {8923969},
	keywords = {Adenosine Triphosphate, Base Sequence, Deoxyadenine Nucleotides, Diphosphates, DNA, DNA-Directed DNA Polymerase, DNA, Single-Stranded, Luciferases, Luminescent Measurements, Molecular Sequence Data, Sequence Analysis, DNA, Thionucleotides},
	pages = {84--89}
}

@article{guindon_estimating_2009,
	title = {Estimating maximum likelihood phylogenies with {PhyML}},
	volume = {537},
	issn = {1064-3745},
	doi = {10.1007/978-1-59745-251-9_6},
	abstract = {Our understanding of the origins, the functions and/or the structures of biological sequences strongly depends on our ability to decipher the mechanisms of molecular evolution. These complex processes can be described through the comparison of homologous sequences in a phylogenetic framework. Moreover, phylogenetic inference provides sound statistical tools to exhibit the main features of molecular evolution from the analysis of actual sequences. This chapter focuses on phylogenetic tree estimation under the maximum likelihood (ML) principle. Phylogenies inferred under this probabilistic criterion are usually reliable and important biological hypotheses can be tested through the comparison of different models. Estimating ML phylogenies is computationally demanding, and careful examination of the results is warranted. This chapter focuses on PhyML, a software that implements recent ML phylogenetic methods and algorithms. We illustrate the strengths and pitfalls of this program through the analysis of a real data set. PhyML v3.0 is available from (http://atgc\_montpellier.fr/phyml/).},
	language = {eng},
	journal = {Methods in Molecular Biology (Clifton, N.J.)},
	author = {Guindon, Stéphane and Delsuc, Frédéric and Dufayard, Jean-François and Gascuel, Olivier},
	year = {2009},
	pmid = {19378142},
	keywords = {Algorithms, Base Sequence, Likelihood Functions, Molecular Sequence Data, Phylogeny, Software, User-Computer Interface},
	pages = {113--137}
}

@article{camacho_blast_2009,
	title = {{BLAST}+: architecture and applications},
	volume = {10},
	issn = {1471-2105},
	shorttitle = {{BLAST}+},
	url = {http://dx.doi.org/10.1186/1471-2105-10-421},
	doi = {10.1186/1471-2105-10-421},
	abstract = {Sequence similarity searching is a very important bioinformatics task. While Basic Local Alignment Search Tool (BLAST) outperforms exact methods through its use of heuristics, the speed of the current BLAST software is suboptimal for very long queries or database sequences. There are also some shortcomings in the user-interface of the current command-line applications.},
	urldate = {2017-04-15},
	journal = {BMC Bioinformatics},
	author = {Camacho, Christiam and Coulouris, George and Avagyan, Vahram and Ma, Ning and Papadopoulos, Jason and Bealer, Kevin and Madden, Thomas L.},
	year = {2009},
	pages = {421},
	annote = {Pages 421 in PDF},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/PMCQERCE/1471-2105-10-421.html:text/html}
}

@article{earl_assemblathon_2011,
	title = {Assemblathon 1: {A} competitive assessment of de novo short read assembly methods},
	volume = {21},
	issn = {1088-9051, 1549-5469},
	shorttitle = {Assemblathon 1},
	url = {http://genome.cshlp.org/content/21/12/2224},
	doi = {10.1101/gr.126599.111},
	abstract = {Low-cost short read sequencing technology has revolutionized genomics, though it is only just becoming practical for the high-quality de novo assembly of a novel large genome. We describe the Assemblathon 1 competition, which aimed to comprehensively assess the state of the art in de novo assembly methods when applied to current sequencing technologies. In a collaborative effort, teams were asked to assemble a simulated Illumina HiSeq data set of an unknown, simulated diploid genome. A total of 41 assemblies from 17 different groups were received. Novel haplotype aware assessments of coverage, contiguity, structure, base calling, and copy number were made. We establish that within this benchmark: (1) It is possible to assemble the genome to a high level of coverage and accuracy, and that (2) large differences exist between the assemblies, suggesting room for further improvements in current methods. The simulated benchmark, including the correct answer, the assemblies, and the code that was used to evaluate the assemblies is now public and freely available from http://www.assemblathon.org/.},
	language = {en},
	number = {12},
	urldate = {2017-04-15},
	journal = {Genome Research},
	author = {Earl, Dent and Bradnam, Keith and John, John St and Darling, Aaron and Lin, Dawei and Fass, Joseph and Yu, Hung On Ken and Buffalo, Vince and Zerbino, Daniel R. and Diekhans, Mark and Nguyen, Ngan and Ariyaratne, Pramila Nuwantha and Sung, Wing-Kin and Ning, Zemin and Haimel, Matthias and Simpson, Jared T. and Fonseca, Nuno A. and Birol, İnanç and Docking, T. Roderick and Ho, Isaac Y. and Rokhsar, Daniel S. and Chikhi, Rayan and Lavenier, Dominique and Chapuis, Guillaume and Naquin, Delphine and Maillet, Nicolas and Schatz, Michael C. and Kelley, David R. and Phillippy, Adam M. and Koren, Sergey and Yang, Shiaw-Pyng and Wu, Wei and Chou, Wen-Chi and Srivastava, Anuj and Shaw, Timothy I. and Ruby, J. Graham and Skewes-Cox, Peter and Betegon, Miguel and Dimon, Michelle T. and Solovyev, Victor and Seledtsov, Igor and Kosarev, Petr and Vorobyev, Denis and Ramirez-Gonzalez, Ricardo and Leggett, Richard and MacLean, Dan and Xia, Fangfang and Luo, Ruibang and Li, Zhenyu and Xie, Yinlong and Liu, Binghang and Gnerre, Sante and MacCallum, Iain and Przybylski, Dariusz and Ribeiro, Filipe J. and Yin, Shuangye and Sharpe, Ted and Hall, Giles and Kersey, Paul J. and Durbin, Richard and Jackman, Shaun D. and Chapman, Jarrod A. and Huang, Xiaoqiu and DeRisi, Joseph L. and Caccamo, Mario and Li, Yingrui and Jaffe, David B. and Green, Richard E. and Haussler, David and Korf, Ian and Paten, Benedict},
	month = dec,
	year = {2011},
	pmid = {21926179},
	pages = {2224--2241},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/4SMNINKA/Earl et al. - 2011 - Assemblathon 1 A competitive assessment of de nov.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/SWR6FV85/2224.html:text/html}
}

@article{filipski_phylogenetic_2015,
	title = {Phylogenetic placement of metagenomic reads using the minimum evolution principle},
	volume = {16},
	issn = {1471-2164},
	doi = {10.1186/1471-2164-16-S1-S13},
	abstract = {BACKGROUND: A central problem of computational metagenomics is determining the correct placement into an existing phylogenetic tree of individual reads (nucleotide sequences of varying lengths, ranging from hundreds to thousands of bases) obtained using next-generation sequencing of DNA samples from a mixture of known and unknown species. Correct placement allows us to easily identify or classify the sequences in the sample as to taxonomic position or function.
RESULTS: Here we propose a novel method (PhyClass), based on the Minimum Evolution (ME) phylogenetic inference criterion, for determining the appropriate phylogenetic position of each read. Without using heuristics, the new approach efficiently finds the optimal placement of the unknown read in a reference phylogenetic tree given a sequence alignment for the taxa in the tree. In short, the total resulting branch length for the tree is computed for every possible placement of the unknown read and the placement that gives the smallest value for this total is the best (optimal) choice. By taking advantage of computational efficiencies and mathematical formulations, we are able to find the true optimal ME placement for each read in the phylogenetic tree. Using computer simulations, we assessed the accuracy of the new approach for different read lengths over a variety of data sets and phylogenetic trees. We found the accuracy of the new method to be good and comparable to existing Maximum Likelihood (ML) approaches.
CONCLUSIONS: In particular, we found that the consensus assignments based on ME and ML approaches are more correct than either method individually. This is true even when the statistical support for read assignments was low, which is inevitable given that individual reads are often short and come from only one gene.},
	language = {eng},
	number = {1},
	journal = {BMC genomics},
	author = {Filipski, Alan and Tamura, Koichiro and Billing-Ross, Paul and Murillo, Oscar and Kumar, Sudhir},
	month = dec,
	year = {2015},
	pmid = {25776726},
	pages = {6947},
	file = {1471-2164-16-S1-S13.pdf:/Volumes/HOME/Zotero/storage/RMWJXDVD/1471-2164-16-S1-S13.pdf:application/pdf}
}

@article{magoc_gage-b_2013,
	title = {{GAGE}-{B}: an evaluation of genome assemblers for bacterial organisms},
	volume = {29},
	issn = {1367-4811},
	shorttitle = {{GAGE}-{B}},
	doi = {10.1093/bioinformatics/btt273},
	abstract = {MOTIVATION: A large and rapidly growing number of bacterial organisms have been sequenced by the newest sequencing technologies. Cheaper and faster sequencing technologies make it easy to generate very high coverage of bacterial genomes, but these advances mean that DNA preparation costs can exceed the cost of sequencing for small genomes. The need to contain costs often results in the creation of only a single sequencing library, which in turn introduces new challenges for genome assembly methods.
RESULTS: We evaluated the ability of multiple genome assembly programs to assemble bacterial genomes from a single, deep-coverage library. For our comparison, we chose bacterial species spanning a wide range of GC content and measured the contiguity and accuracy of the resulting assemblies. We compared the assemblies produced by this very high-coverage, one-library strategy to the best assemblies created by two-library sequencing, and we found that remarkably good bacterial assemblies are possible with just one library. We also measured the effect of read length and depth of coverage on assembly quality and determined the values that provide the best results with current algorithms.
CONTACT: salzberg@jhu.edu
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
	language = {eng},
	number = {14},
	journal = {Bioinformatics (Oxford, England)},
	author = {Magoc, Tanja and Pabinger, Stephan and Canzar, Stefan and Liu, Xinyue and Su, Qi and Puiu, Daniela and Tallon, Luke J. and Salzberg, Steven L.},
	month = jul,
	year = {2013},
	pmid = {23665771},
	pmcid = {PMC3702249},
	keywords = {Algorithms, Gene Library, Genome, Bacterial, Genomics, Sequence Analysis, DNA, Software},
	pages = {1718--1725},
	file = {btt273.pdf:/Volumes/HOME/Zotero/storage/Q6FFQBJM/btt273.pdf:application/pdf}
}

@article{zerbino_velvet_2008,
	title = {Velvet: {Algorithms} for de novo short read assembly using de {Bruijn} graphs},
	volume = {18},
	issn = {1088-9051, 1549-5469},
	shorttitle = {Velvet},
	url = {http://genome.cshlp.org/content/18/5/821},
	doi = {10.1101/gr.074492.107},
	abstract = {We have developed a new set of algorithms, collectively called “Velvet,” to manipulate de Bruijn graphs for genomic sequence assembly. A de Bruijn graph is a compact representation based on short words (k-mers) that is ideal for high coverage, very short read (25–50 bp) data sets. Applying Velvet to very short reads and paired-ends information only, one can produce contigs of significant length, up to 50-kb N50 length in simulations of prokaryotic data and 3-kb N50 on simulated mammalian BACs. When applied to real Solexa data sets without read pairs, Velvet generated contigs of ∼8 kb in a prokaryote and 2 kb in a mammalian BAC, in close agreement with our simulated results without read-pair information. Velvet represents a new approach to assembly that can leverage very short reads in combination with read pairs to produce useful assemblies.},
	language = {en},
	number = {5},
	urldate = {2017-04-11},
	journal = {Genome Research},
	author = {Zerbino, Daniel R. and Birney, Ewan},
	month = may,
	year = {2008},
	pmid = {18349386},
	pages = {821--829},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/C6PPUM9S/Zerbino and Birney - 2008 - Velvet Algorithms for de novo short read assembly.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/5FGUHJRP/821.html:text/html}
}

@article{li_novo_2010,
	title = {De novo assembly of human genomes with massively parallel short read sequencing},
	volume = {20},
	issn = {1088-9051, 1549-5469},
	url = {http://genome.cshlp.org/content/20/2/265},
	doi = {10.1101/gr.097261.109},
	abstract = {Next-generation massively parallel DNA sequencing technologies provide ultrahigh throughput at a substantially lower unit data cost; however, the data are very short read length sequences, making de novo assembly extremely challenging. Here, we describe a novel method for de novo assembly of large genomes from short read sequences. We successfully assembled both the Asian and African human genome sequences, achieving an N50 contig size of 7.4 and 5.9 kilobases (kb) and scaffold of 446.3 and 61.9 kb, respectively. The development of this de novo short read assembly method creates new opportunities for building reference sequences and carrying out accurate analyses of unexplored genomes in a cost-effective way.},
	language = {en},
	number = {2},
	urldate = {2017-04-11},
	journal = {Genome Research},
	author = {Li, Ruiqiang and Zhu, Hongmei and Ruan, Jue and Qian, Wubin and Fang, Xiaodong and Shi, Zhongbin and Li, Yingrui and Li, Shengting and Shan, Gao and Kristiansen, Karsten and Li, Songgang and Yang, Huanming and Wang, Jian and Wang, Jun},
	month = feb,
	year = {2010},
	pmid = {20019144},
	pages = {265--272},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/P6WNUR9P/Li et al. - 2010 - De novo assembly of human genomes with massively p.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/CKJRVFT6/265.html:text/html}
}

@article{butler_allpaths_2008,
	title = {{ALLPATHS}: {De} novo assembly of whole-genome shotgun microreads},
	volume = {18},
	issn = {1088-9051},
	shorttitle = {{ALLPATHS}},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2336810/},
	doi = {10.1101/gr.7337908},
	abstract = {New DNA sequencing technologies deliver data at dramatically lower costs but demand new analytical methods to take full advantage of the very short reads that they produce. We provide an initial, theoretical solution to the challenge of de novo assembly from whole-genome shotgun “microreads.” For 11 genomes of sizes up to 39 Mb, we generated high-quality assemblies from 80× coverage by paired 30-base simulated reads modeled after real Illumina-Solexa reads. The bacterial genomes of Campylobacter jejuni and Escherichia coli assemble optimally, yielding single perfect contigs, and larger genomes yield assemblies that are highly connected and accurate. Assemblies are presented in a graph form that retains intrinsic ambiguities such as those arising from polymorphism, thereby providing information that has been absent from previous genome assemblies. For both C. jejuni and E. coli, this assembly graph is a single edge encompassing the entire genome. Larger genomes produce more complicated graphs, but the vast majority of the bases in their assemblies are present in long edges that are nearly always perfect. We describe a general method for genome assembly that can be applied to all types of DNA sequence data, not only short read data, but also conventional sequence reads.},
	number = {5},
	urldate = {2017-04-11},
	journal = {Genome Research},
	author = {Butler, Jonathan and MacCallum, Iain and Kleber, Michael and Shlyakhter, Ilya A. and Belmonte, Matthew K. and Lander, Eric S. and Nusbaum, Chad and Jaffe, David B.},
	month = may,
	year = {2008},
	pmid = {18340039},
	pmcid = {PMC2336810},
	pages = {810--820},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/HM8KRBUH/Butler et al. - 2008 - ALLPATHS De novo assembly of whole-genome shotgun.pdf:application/pdf}
}

@article{simpson_efficient_2012,
	title = {Efficient de novo assembly of large genomes using compressed data structures},
	volume = {22},
	issn = {1549-5469},
	doi = {10.1101/gr.126953.111},
	abstract = {De novo genome sequence assembly is important both to generate new sequence assemblies for previously uncharacterized genomes and to identify the genome sequence of individuals in a reference-unbiased way. We present memory efficient data structures and algorithms for assembly using the FM-index derived from the compressed Burrows-Wheeler transform, and a new assembler based on these called SGA (String Graph Assembler). We describe algorithms to error-correct, assemble, and scaffold large sets of sequence data. SGA uses the overlap-based string graph model of assembly, unlike most de novo assemblers that rely on de Bruijn graphs, and is simply parallelizable. We demonstrate the error correction and assembly performance of SGA on 1.2 billion sequence reads from a human genome, which we are able to assemble using 54 GB of memory. The resulting contigs are highly accurate and contiguous, while covering 95\% of the reference genome (excluding contigs {\textless}200 bp in length). Because of the low memory requirements and parallelization without requiring inter-process communication, SGA provides the first practical assembler to our knowledge for a mammalian-sized genome on a low-end computing cluster.},
	language = {eng},
	number = {3},
	journal = {Genome Research},
	author = {Simpson, Jared T. and Durbin, Richard},
	month = mar,
	year = {2012},
	pmid = {22156294},
	pmcid = {PMC3290790},
	keywords = {Algorithms, Animals, Computational Biology, Data Compression, Genomics, Humans, Internet, Reproducibility of Results, Sequence Analysis, DNA, Software},
	pages = {549--556}
}

@article{bankevich_spades_2012,
	title = {{SPAdes}: {A} {New} {Genome} {Assembly} {Algorithm} and {Its} {Applications} to {Single}-{Cell} {Sequencing}},
	volume = {19},
	issn = {1066-5277},
	shorttitle = {{SPAdes}},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3342519/},
	doi = {10.1089/cmb.2012.0021},
	abstract = {The lion's share of bacteria in various environments cannot be cloned in the laboratory and thus cannot be sequenced using existing technologies. A major goal of single-cell genomics is to complement gene-centric metagenomic data with whole-genome assemblies of uncultivated organisms. Assembly of single-cell data is challenging because of highly non-uniform read coverage as well as elevated levels of sequencing errors and chimeric reads. We describe SPAdes, a new assembler for both single-cell and standard (multicell) assembly, and demonstrate that it improves on the recently released E+V−SC assembler (specialized for single-cell data) and on popular assemblers Velvet and SoapDeNovo (for multicell data). SPAdes generates single-cell assemblies, providing information about genomes of uncultivatable bacteria that vastly exceeds what may be obtained via traditional metagenomics studies. SPAdes is available online (http://bioinf.spbau.ru/spades). It is distributed as open source software.},
	number = {5},
	urldate = {2017-04-11},
	journal = {Journal of Computational Biology},
	author = {Bankevich, Anton and Nurk, Sergey and Antipov, Dmitry and Gurevich, Alexey A. and Dvorkin, Mikhail and Kulikov, Alexander S. and Lesin, Valery M. and Nikolenko, Sergey I. and Pham, Son and Prjibelski, Andrey D. and Pyshkin, Alexey V. and Sirotkin, Alexander V. and Vyahhi, Nikolay and Tesler, Glenn and Alekseyev, Max A. and Pevzner, Pavel A.},
	month = may,
	year = {2012},
	pmid = {22506599},
	pmcid = {PMC3342519},
	pages = {455--477},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/I2XB3E46/Bankevich et al. - 2012 - SPAdes A New Genome Assembly Algorithm and Its Ap.pdf:application/pdf}
}

@article{sumner_is_2012,
	title = {Is the general time-reversible model bad for molecular phylogenetics?},
	volume = {61},
	issn = {1076-836X},
	doi = {10.1093/sysbio/sys042},
	language = {eng},
	number = {6},
	journal = {Systematic Biology},
	author = {Sumner, Jeremy G. and Jarvis, Peter D. and Fernández-Sánchez, Jesús and Kaine, Bodie T. and Woodhams, Michael D. and Holland, Barbara R.},
	month = dec,
	year = {2012},
	pmid = {22442193},
	keywords = {Classification, Models, Theoretical, Phylogeny},
	pages = {1069--1074}
}

@article{huelsenbeck_empirical_2001,
	title = {Empirical and hierarchical {Bayesian} estimation of ancestral states},
	volume = {50},
	issn = {1063-5157},
	abstract = {Several methods have been proposed to infer the states at the ancestral nodes on a phylogeny. These methods assume a specific tree and set of branch lengths when estimating the ancestral character state. Inferences of the ancestral states, then, are conditioned on the tree and branch lengths being true. We develop a hierarchical Bayes method for inferring the ancestral states on a tree. The method integrates over uncertainty in the tree, branch lengths, and substitution model parameters by using Markov chain Monte Carlo. We compare the hierarchical Bayes inferences of ancestral states with inferences of ancestral states made under the assumption that a specific tree is correct. We find that the methods are correlated, but that accommodating uncertainty in parameters of the phylogenetic model can make inferences of ancestral states even more uncertain than they would be in an empirical Bayes analysis.},
	language = {eng},
	number = {3},
	journal = {Systematic Biology},
	author = {Huelsenbeck, J. P. and Bollback, J. P.},
	month = jun,
	year = {2001},
	pmid = {12116580},
	keywords = {Animals, Bayes Theorem, DNA, Humans, Likelihood Functions, Markov Chains, Models, Genetic, Monte Carlo Method, Phylogeny},
	pages = {351--366}
}

@article{rannala_probability_1996,
	title = {Probability distribution of molecular evolutionary trees: a new method of phylogenetic inference},
	volume = {43},
	issn = {0022-2844},
	shorttitle = {Probability distribution of molecular evolutionary trees},
	abstract = {A new method is presented for inferring evolutionary trees using nucleotide sequence data. The birth-death process is used as a model of speciation and extinction to specify the prior distribution of phylogenies and branching times. Nucleotide substitution is modeled by a continuous-time Markov process. Parameters of the branching model and the substitution model are estimated by maximum likelihood. The posterior probabilities of different phylogenies are calculated and the phylogeny with the highest posterior probability is chosen as the best estimate of the evolutionary relationship among species. We refer to this as the maximum posterior probability (MAP) tree. The posterior probability provides a natural measure of the reliability of the estimated phylogeny. Two example data sets are analyzed to infer the phylogenetic relationship of human, chimpanzee, gorilla, and orangutan. The best trees estimated by the new method are the same as those from the maximum likelihood analysis of separate topologies, but the posterior probabilities are quite different from the bootstrap proportions. The results of the method are found to be insensitive to changes in the rate parameter of the branching process.},
	language = {eng},
	number = {3},
	journal = {Journal of Molecular Evolution},
	author = {Rannala, B. and Yang, Z.},
	month = sep,
	year = {1996},
	pmid = {8703097},
	keywords = {Animals, Bayes Theorem, Death, Evolution, Molecular, Female, Globins, Gorilla gorilla, Humans, Labor, Obstetric, Markov Chains, Mitochondria, Models, Genetic, Models, Statistical, Pan troglodytes, Phylogeny, Pongo pygmaeus, Pregnancy, Probability, Pseudogenes, RNA},
	pages = {304--311}
}

@article{felsenstein_evolutionary_1981,
	title = {Evolutionary trees from {DNA} sequences: a maximum likelihood approach},
	volume = {17},
	issn = {0022-2844},
	shorttitle = {Evolutionary trees from {DNA} sequences},
	abstract = {The application of maximum likelihood techniques to the estimation of evolutionary trees from nucleic acid sequence data is discussed. A computationally feasible method for finding such maximum likelihood estimates is developed, and a computer program is available. This method has advantages over the traditional parsimony algorithms, which can give misleading results if rates of evolution differ in different lineages. It also allows the testing of hypotheses about the constancy of evolutionary rates by likelihood ratio tests, and gives rough indication of the error of ;the estimate of the tree.},
	language = {eng},
	number = {6},
	journal = {Journal of Molecular Evolution},
	author = {Felsenstein, J.},
	year = {1981},
	pmid = {7288891},
	keywords = {Base Sequence, Biological Evolution, Computers, DNA, Mathematics, Models, Biological, Phylogeny},
	pages = {368--376}
}

@article{yang_maximum_1994,
	title = {Maximum likelihood phylogenetic estimation from {DNA} sequences with variable rates over sites: approximate methods},
	volume = {39},
	issn = {0022-2844},
	shorttitle = {Maximum likelihood phylogenetic estimation from {DNA} sequences with variable rates over sites},
	abstract = {Two approximate methods are proposed for maximum likelihood phylogenetic estimation, which allow variable rates of substitution across nucleotide sites. Three data sets with quite different characteristics were analyzed to examine empirically the performance of these methods. The first, called the "discrete gamma model," uses several categories of rates to approximate the gamma distribution, with equal probability for each category. The mean of each category is used to represent all the rates falling in the category. The performance of this method is found to be quite good, and four such categories appear to be sufficient to produce both an optimum, or near-optimum fit by the model to the data, and also an acceptable approximation to the continuous distribution. The second method, called "fixed-rates model", classifies sites into several classes according to their rates predicted assuming the star tree. Sites in different classes are then assumed to be evolving at these fixed rates when other tree topologies are evaluated. Analyses of the data sets suggest that this method can produce reasonable results, but it seems to share some properties of a least-squares pairwise comparison; for example, interior branch lengths in nonbest trees are often found to be zero. The computational requirements of the two methods are comparable to that of Felsenstein's (1981, J Mol Evol 17:368-376) model, which assumes a single rate for all the sites.},
	language = {eng},
	number = {3},
	journal = {Journal of Molecular Evolution},
	author = {Yang, Z.},
	month = sep,
	year = {1994},
	pmid = {7932792},
	keywords = {Animals, DNA, Mitochondrial, Genetic Variation, Globins, Humans, Likelihood Functions, Mammals, Models, Genetic, Phylogeny, Point Mutation, Primates, RNA, Ribosomal},
	pages = {306--314}
}

@article{shoemaker_evidence_1989,
	title = {Evidence from nuclear sequences that invariable sites should be considered when sequence divergence is calculated},
	volume = {6},
	issn = {0737-4038},
	abstract = {It has long been known, from the distribution of multiple amino acid replacements, that not all amino acids of a sequence are replaceable. More recently, the phenomenon was observed at the nucleotide level in mitochondrial DNA even after allowing for different rates of transition and transversion substitutions. We have extended the search to globin gene sequences from various organisms, with the following results: (1) Nearly every data set showed evidence of invariable nucleotide positions. (2) In all data sets, substitution rates of transversions and transitions were never in the ratio of 2/1, and rarely was the ratio even constant. (3) Only rarely (e.g., the third codon position of beta hemoglobins) was it possible to fit the data set solely by making allowance for the number of invariable positions and for the relative rates of transversion and transition substitutions. (4) For one data set (the second codon position of beta hemoglobins) we were able to simulate the observed data by making the allowance in (3) and having the set of covariotides (concomitantly variable nucleotides) be small in number and be turned over in a stochastic manner with a probability that was appreciable. (5) The fit in the latter case suggests, if the assumptions are correct and at all common, that current procedures for estimating the total number of nucleotide substitutions in two genes since their divergence from their common ancestor could be low by as much as an order of magnitude. (6) The fact that only a small fraction of the nucleotide positions differ is no guarantee that one is not seriously underestimating the total amount of divergence (substitutions). (7) Most data sets are so heterogeneous in their number of transition and transversion differences that none of the current models of nucleotide substitution seem to fit them even after (a) segregation of coding from noncoding sequences and (b) splitting of the codon into three subsets by codon position. (8) These frequently occurring problems cannot be seen unless several reasonably divergent orthologous genes are examined together.},
	language = {eng},
	number = {3},
	journal = {Molecular Biology and Evolution},
	author = {Shoemaker, J. S. and Fitch, W. M.},
	month = may,
	year = {1989},
	pmid = {2622335},
	keywords = {Base Sequence, DNA, Female, Genes, Genetic Variation, Globins, Male, Sequence Homology, Nucleic Acid},
	pages = {270--289}
}

@article{arenas_trends_2015,
	title = {Trends in substitution models of molecular evolution},
	volume = {6},
	issn = {1664-8021},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4620419/},
	doi = {10.3389/fgene.2015.00319},
	abstract = {Substitution models of evolution describe the process of genetic variation through fixed mutations and constitute the basis of the evolutionary analysis at the molecular level. Almost 40 years after the development of first substitution models, highly sophisticated, and data-specific substitution models continue emerging with the aim of better mimicking real evolutionary processes. Here I describe current trends in substitution models of DNA, codon and amino acid sequence evolution, including advantages and pitfalls of the most popular models. The perspective concludes that despite the large number of currently available substitution models, further research is required for more realistic modeling, especially for DNA coding and amino acid data. Additionally, the development of more accurate complex models should be coupled with new implementations and improvements of methods and frameworks for substitution model selection and downstream evolutionary analysis.},
	urldate = {2017-04-22},
	journal = {Frontiers in Genetics},
	author = {Arenas, Miguel},
	month = oct,
	year = {2015},
	pmid = {26579193},
	pmcid = {PMC4620419},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/ZAAG5KMT/Arenas - 2015 - Trends in substitution models of molecular evoluti.pdf:application/pdf}
}

@article{lemmon_importance_2004,
	title = {The importance of proper model assumption in bayesian phylogenetics},
	volume = {53},
	issn = {1063-5157},
	doi = {10.1080/10635150490423520},
	abstract = {We studied the importance of proper model assumption in the context of Bayesian phylogenetics by examining {\textgreater}5,000 Bayesian analyses and six nested models of nucleotide substitution. Model misspecification can strongly bias bipartition posterior probability estimates. These biases were most pronounced when rate heterogeneity was ignored. The type of bias seen at a particular bipartition appeared to be strongly influenced by the lengths of the branches surrounding that bipartition. In the Felsenstein zone, posterior probability estimates of bipartitions were biased when the assumed model was underparameterized but were unbiased when the assumed model was overparameterized. For the inverse Felsenstein zone, however, both underparameterization and overparameterization led to biased bipartition posterior probabilities, although the bias caused by overparameterization was less pronounced and disappeared with increased sequence length. Model parameter estimates were also affected by model misspecification. Underparameterization caused a bias in some parameter estimates, such as branch lengths and the gamma shape parameter, whereas overparameterization caused a decrease in the precision of some parameter estimates. We caution researchers to assure that the most appropriate model is assumed by employing both a priori model choice methods and a posteriori model adequacy tests.},
	language = {eng},
	number = {2},
	journal = {Systematic Biology},
	author = {Lemmon, Alan R. and Moriarty, Emily C.},
	month = apr,
	year = {2004},
	pmid = {15205052},
	keywords = {Bayes Theorem, Computer Simulation, Evolution, Molecular, Models, Genetic, Models, Statistical, Phylogeny, Research Design},
	pages = {265--277}
}

@article{trifonov_geographic_2009,
	title = {Geographic {Dependence}, {Surveillance}, and {Origins} of the 2009 {Influenza} {A} ({H}1N1) {Virus}},
	volume = {361},
	issn = {0028-4793},
	url = {http://dx.doi.org/10.1056/NEJMp0904572},
	doi = {10.1056/NEJMp0904572},
	abstract = {In April 2009, a new strain of human H1N1 influenza A virus was identified in Mexico. According to the World Health Organization (www.who.int/csr/don/2009\_05\_25), as of May 25, 2009, the virus had spread to 43 countries, with 12,515 reported cases and 91 associated deaths, and it has been assessed as having pandemic potential.1 Genomic analysis of the 2009 influenza A (H1N1) virus in humans indicates that it is closely related to common reassortant swine influenza A viruses isolated in North America, Europe, and Asia (Figure 1).2–4 The segments coding for the polymerase complex, hemagglutinin, nuclear protein, and nonstructural proteins show . . .},
	number = {2},
	urldate = {2017-04-07},
	journal = {New England Journal of Medicine},
	author = {Trifonov, Vladimir and Khiabanian, Hossein and Rabadan, Raul},
	month = jul,
	year = {2009},
	pmid = {19474418},
	pages = {115--119},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/AN7V5UWU/Trifonov et al. - 2009 - Geographic Dependence, Surveillance, and Origins o.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/D6D8ADFB/NEJMp0904572.html:text/html}
}

@article{ross_characterizing_2013,
	title = {Characterizing and measuring bias in sequence data},
	volume = {14},
	copyright = {2013 Ross et al.; licensee BioMed Central Ltd.},
	issn = {1474-760X},
	url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2013-14-5-r51},
	doi = {10.1186/gb-2013-14-5-r51},
	abstract = {DNA sequencing technologies deviate from the ideal uniform distribution of reads. These biases impair scientific and medical applications. Accordingly, we have developed computational methods for discovering, describing and measuring bias. We applied these methods to the Illumina, Ion Torrent, Pacific Biosciences and Complete Genomics sequencing platforms, using data from human and from a set of microbes with diverse base compositions. As in previous work, library construction conditions significantly influence sequencing bias. Pacific Biosciences coverage levels are the least biased, followed by Illumina, although all technologies exhibit error-rate biases in high- and low-GC regions and at long homopolymer runs. The GC-rich regions prone to low coverage include a number of human promoters, so we therefore catalog 1,000 that were exceptionally resistant to sequencing. Our results indicate that combining data from two technologies can reduce coverage bias if the biases in the component technologies are complementary and of similar magnitude. Analysis of Illumina data representing 120-fold coverage of a well-studied human sample reveals that 0.20\% of the autosomal genome was covered at less than 10\% of the genome-wide average. Excluding locations that were similar to known bias motifs or likely due to sample-reference variations left only 0.045\% of the autosomal genome with unexplained poor coverage. The assays presented in this paper provide a comprehensive view of sequencing bias, which can be used to drive laboratory improvements and to monitor production processes. Development guided by these assays should result in improved genome assemblies and better coverage of biologically important loci.},
	language = {En},
	number = {5},
	urldate = {2017-03-29},
	journal = {Genome Biology},
	author = {Ross, Michael G. and Russ, Carsten and Costello, Maura and Hollinger, Andrew and Lennon, Niall J. and Hegarty, Ryan and Nusbaum, Chad and Jaffe, David B.},
	month = may,
	year = {2013},
	pages = {R51},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/KBTACC6G/Ross et al. - 2013 - Characterizing and measuring bias in sequence data.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/KDWMZQSW/gb-2013-14-5-r51.html:text/html}
}

@article{glenn_field_2011,
	title = {Field guide to next-generation {DNA} sequencers},
	volume = {11},
	issn = {1755-0998},
	url = {http://onlinelibrary.wiley.com.eproxy2.lib.hku.hk/doi/10.1111/j.1755-0998.2011.03024.x/abstract},
	doi = {10.1111/j.1755-0998.2011.03024.x},
	abstract = {The diversity of available 2nd and 3rd generation DNA sequencing platforms is increasing rapidly. Costs for these systems range from {\textless}\$100 000 to more than \$1 000 000, with instrument run times ranging from minutes to weeks. Extensive trade-offs exist among these platforms. I summarize the major characteristics of each commercially available platform to enable direct comparisons. In terms of cost per megabase (Mb) of sequence, the Illumina and SOLiD platforms are clearly superior (≤\$0.10/Mb vs. {\textgreater}\$10/Mb for 454 and some Ion Torrent chips). In terms of cost per nonmultiplexed sample and instrument run time, the Pacific Biosciences and Ion Torrent platforms excel, with the 454 GS Junior and Illumina MiSeq also notable in this regard. All platforms allow multiplexing of samples, but details of library preparation, experimental design and data analysis can constrain the options. The wide range of characteristics among available platforms provides opportunities both to conduct groundbreaking studies and to waste money on scales that were previously infeasible. Thus, careful thought about the desired characteristics of these systems is warranted before purchasing or using any of them. Updated information from this guide will be maintained at: http://dna.uga.edu/ and http://tomato.biol.trinity.edu/blog/.},
	language = {en},
	number = {5},
	urldate = {2017-03-29},
	journal = {Molecular Ecology Resources},
	author = {Glenn, Travis C.},
	month = sep,
	year = {2011},
	keywords = {2nd and 3rd generation sequencing, 454, Helicos, Illumina, Ion Torrent, Life Technologies, massively parallel sequencing, Pacific Biosystems, Roche, SOLiD},
	pages = {759--769},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/K9F65BMB/Glenn - 2011 - Field guide to next-generation DNA sequencers.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/8WJ96RMG/abstract.html:text/html}
}

@article{kimura_simple_1980,
	title = {A simple method for estimating evolutionary rates of base substitutions through comparative studies of nucleotide sequences},
	volume = {16},
	issn = {0022-2844},
	abstract = {Some simple formulae were obtained which enable us to estimate evolutionary distances in terms of the number of nucleotide substitutions (and, also, the evolutionary rates when the divergence times are known). In comparing a pair of nucleotide sequences, we distinguish two types of differences; if homologous sites are occupied by different nucleotide bases but both are purines or both pyrimidines, the difference is called type I (or "transition" type), while, if one of the two is a purine and the other is a pyrimidine, the difference is called type II (or "transversion" type). Letting P and Q be respectively the fractions of nucleotide sites showing type I and type II differences between two sequences compared, then the evolutionary distance per site is K = -(1/2) ln [(1-2P-Q) square root of 1-2Q]. The evolutionary rate per year is then given by k = K/(2T), where T is the time since the divergence of the two sequences. If only the third codon positions are compared, the synonymous component of the evolutionary base substitutions per site is estimated by K'S = -(1/2) ln (1-2P-Q). Also, formulae for standard errors were obtained. Some examples were worked out using reported globin sequences to show that synonymous substitutions occur at much higher rates than amino acid-altering substitutions in evolution.},
	language = {eng},
	number = {2},
	journal = {Journal of Molecular Evolution},
	author = {Kimura, M.},
	month = dec,
	year = {1980},
	pmid = {7463489},
	keywords = {Animals, Base Sequence, Biological Evolution, DNA, Humans, Mathematics, Models, Biological, Mutation, Probability, Proteins, Species Specificity},
	pages = {111--120}
}

@article{zhang_influenza_2017,
	title = {Influenza {Research} {Database}: {An} integrated bioinformatics resource for influenza virus research},
	volume = {45},
	issn = {1362-4962},
	shorttitle = {Influenza {Research} {Database}},
	doi = {10.1093/nar/gkw857},
	abstract = {The Influenza Research Database (IRD) is a U.S. National Institute of Allergy and Infectious Diseases (NIAID)-sponsored Bioinformatics Resource Center dedicated to providing bioinformatics support for influenza virus research. IRD facilitates the research and development of vaccines, diagnostics and therapeutics against influenza virus by providing a comprehensive collection of influenza-related data integrated from various sources, a growing suite of analysis and visualization tools for data mining and hypothesis generation, personal workbench spaces for data storage and sharing, and active user community support. Here, we describe the recent improvements in IRD including the use of cloud and high performance computing resources, analysis and visualization of user-provided sequence data with associated metadata, predictions of novel variant proteins, annotations of phenotype-associated sequence markers and their predicted phenotypic effects, hemagglutinin (HA) clade classifications, an automated tool for HA subtype numbering conversion, linkouts to disease event data and the addition of host factor and antiviral drug components. All data and tools are freely available without restriction from the IRD website at https://www.fludb.org.},
	language = {eng},
	number = {D1},
	journal = {Nucleic Acids Research},
	author = {Zhang, Yun and Aevermann, Brian D. and Anderson, Tavis K. and Burke, David F. and Dauphin, Gwenaelle and Gu, Zhiping and He, Sherry and Kumar, Sanjeev and Larsen, Christopher N. and Lee, Alexandra J. and Li, Xiaomei and Macken, Catherine and Mahaffey, Colin and Pickett, Brett E. and Reardon, Brian and Smith, Thomas and Stewart, Lucy and Suloway, Christian and Sun, Guangyu and Tong, Lei and Vincent, Amy L. and Walters, Bryan and Zaremba, Sam and Zhao, Hongtao and Zhou, Liwei and Zmasek, Christian and Klem, Edward B. and Scheuermann, Richard H.},
	month = jan,
	year = {2017},
	pmid = {27679478},
	pmcid = {PMC5210613},
	pages = {D466--D474},
	file = {gkw857.pdf:/Volumes/HOME/Zotero/storage/KTFXDJSV/gkw857.pdf:application/pdf}
}



@article{xiao_complete_2012,
	title = {Complete {Genome} {Sequences} of {Newcastle} {Disease} {Virus} {Strains} {Circulating} in {Chicken} {Populations} of {Indonesia}},
	volume = {86},
	issn = {0022-538X},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3347286/},
	doi = {10.1128/JVI.00546-12},
	abstract = {Eight highly virulent Newcastle disease virus (NDV) strains were isolated from vaccinated commercial chickens in Indonesia during outbreaks in 2009 and 2010. The complete genome sequences of two NDV strains and the sequences of the surface protein genes (F and HN) of six other strains were determined. Phylogenetic analysis classified them into two new subgroups of genotype VII in the class II cluster that were genetically distinct from vaccine strains. This is the first report of complete genome sequences of NDV strains isolated from chickens in Indonesia.},
	number = {10},
	urldate = {2017-03-15},
	journal = {Journal of Virology},
	author = {Xiao, Sa and Paldurai, Anandan and Nayak, Baibaswata and Samuel, Arthur and Bharoto, Eny E. and Prajitno, Teguh Y. and Collins, Peter L. and Samal, Siba K.},
	month = may,
	year = {2012},
	pmid = {22532534},
	pmcid = {PMC3347286},
	pages = {5969--5970},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/6K2QCPM8/Xiao et al. - 2012 - Complete Genome Sequences of Newcastle Disease Vir.pdf:application/pdf}
}


@article{scholle_viral_2013,
	title = {Viral {Substitution} {Rate} {Variation} {Can} {Arise} from the {Interplay} between {Within}-{Host} and {Epidemiological} {Dynamics}},
	volume = {182},
	issn = {0003-0147},
	url = {http://www.jstor.org/stable/10.1086/672000},
	doi = {10.1086/672000},
	abstract = {AbstractThe evolutionary rates of RNA viruses can differ from one another by several orders of magnitude. Much of this variation has been explained by differences in viral mutation rates and selective environments. However, substitution rates also vary considerably across viral populations belonging to the same species. In particular, viral lineages from epidemic regions tend to have higher substitution rates than those from endemic regions, and lineages from populations with higher contact rates tend to have higher substitution rates than those from populations with lower contact rates. We address the mechanism behind these patterns by using a nested modeling approach, whereby we integrate within-host viral replication dynamics with a population-level epidemiological model. Through numerical simulations and analytical approximations, we show that variation in viral substitution rates over the course of an infection, coupled with differences in age of infection of transmitting hosts under different epidemiological scenarios, can explain these evolutionary patterns. We further derive analytical estimates of expected substitution rate differences under epidemic versus endemic epidemiological conditions. By comparing these estimates to empirical data for four viral species, we show that these factors are sufficient to explain observed variation in substitution rates in three of four cases. This work shows that even in neutrally evolving viral populations, epidemiological dynamics can alter substitution rates via the interplay between within-host replication dynamics and population-level disease dynamics.},
	number = {4},
	urldate = {2017-06-07},
	journal = {The American Naturalist},
	author = {Scholle, Stacy O. and Ypma, Rolf J. F. and Lloyd, Alun L. and Koelle, Katia and Mooij, Associate Editor: Wolf M. and Day, Editor: Troy},
	year = {2013},
	pages = {494--513}
}


@article{kratsch_determination_2016,
	title = {Determination of antigenicity-altering patches on the major surface protein of human influenza {A}/{H}3N2 viruses},
	volume = {2},
	issn = {2057-1577},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4989879/},
	doi = {10.1093/ve/vev025},
	abstract = {Human influenza viruses are rapidly evolving RNA viruses that cause short-term respiratory infections with substantial morbidity and mortality in annual epidemics. Uncovering the general principles of viral coevolution with human hosts is important for pathogen surveillance and vaccine design. Protein regions are an appropriate model for the interactions between two macromolecules, but the currently used epitope definition for the major antigen of influenza viruses, namely hemagglutinin, is very broad. Here, we combined genetic, evolutionary, antigenic, and structural information to determine the most relevant regions of the hemagglutinin of human influenza A/H3N2 viruses for interaction with human immunoglobulins. We estimated the antigenic weights of amino acid changes at individual sites from hemagglutination inhibition data using antigenic tree inference followed by spatial clustering of antigenicity-altering protein sites on the protein structure. This approach determined six relevant areas (patches) for antigenic variation that had a key role in the past antigenic evolution of the viruses. Previous transitions between successive predominating antigenic types of H3N2 viruses always included amino acid changes in either the first or second antigenic patch. Interestingly, there was only partial overlap between the antigenic patches and the patches under strong positive selection. Therefore, besides alterations of antigenicity, other interactions with the host may shape the evolution of human influenza A/H3N2 viruses.},
	number = {1},
	urldate = {2017-06-07},
	journal = {Virus Evolution},
	author = {Kratsch, Christina and Klingen, Thorsten R. and Mümken, Linda and Steinbrück, Lars and McHardy, Alice C.},
	month = feb,
	year = {2016},
	pmid = {27774294},
	pmcid = {PMC4989879},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/I8FNGIRW/Kratsch et al. - 2016 - Determination of antigenicity-altering patches on .pdf:application/pdf}
}

@article{lam_phylodynamics_2012,
	title = {Phylodynamics of {H}5N1 avian influenza virus in {Indonesia}},
	volume = {21},
	issn = {1365-294X},
	doi = {10.1111/j.1365-294X.2012.05577.x},
	abstract = {Understanding how pathogens invade and become established in novel host populations is central to the ecology and evolution of infectious disease. Influenza viruses provide unique opportunities to study these processes in nature because of their rapid evolution, extensive surveillance, large data sets and propensity to jump species boundaries. H5N1 highly pathogenic avian influenza virus (HPAIV) is a major animal pathogen and public health threat. The virus is of particular importance in Indonesia, causing severe outbreaks among poultry and sporadic human infections since 2003. However, little is known about how H5N1 HPAIV emerged and established in Indonesia. To address these questions, we analysed Indonesian H5N1 HPAIV gene sequences isolated during 2003-2007. We find that the virus originated from a single introduction into East Java between November 2002 and October 2003. This invasion was characterized by an initially rapid burst of viral genetic diversity followed by a steady rate of lineage replacement and the maintenance of genetic diversity. Several antigenic sites in the haemagglutinin gene were subject to positive selection during the early phase, suggesting that host-immune-driven selection played a role in host adaptation and expansion. Phylogeographic analyses show that after the initial invasion of H5N1, genetic variants moved both eastwards and westwards across Java, possibly involving long-distance transportation by humans. The phylodynamics we uncover share similarities with other recently studied viral invasions, thereby shedding light on the ecological and evolutionary processes that determine disease emergence in a new geographical region.},
	language = {eng},
	number = {12},
	journal = {Molecular Ecology},
	author = {Lam, Tommy Tsan-Yuk and Hon, Chung-Chau and Lemey, Philippe and Pybus, Oliver G. and Shi, Mang and Tun, Hein Min and Li, Jun and Jiang, Jingwei and Holmes, Edward C. and Leung, Frederick Chi-Ching},
	month = jun,
	year = {2012},
	pmid = {22574738},
	keywords = {Animals, Biological Evolution, Disease Outbreaks, Genetic Variation, Hemagglutinin Glycoproteins, Influenza Virus, Humans, Indonesia, Influenza A Virus, H5N1 Subtype, Influenza, Human, Influenza in Birds, Phylogeny, Phylogeography, Poultry, Poultry Diseases, Sequence Alignment, Sequence Analysis, RNA, Zoonoses},
	pages = {3062--3077}
}


@article{kunin_pyrotagger_2010,
	title = {{PyroTagger} : {A} fast , accurate pipeline for analysis of {rRNA} amplicon pyrosequence data},
	shorttitle = {{PyroTagger}},
	url = {http://www.theopenjournal.org/toj_articles/1},
	urldate = {2017-03-24},
	journal = {The Open Journal},
	author = {Kunin, Victor and Hugenholtz, Philip},
	year = {2010},
	pages = {1--8}
}

@article{callahan_dada2_2016,
	title = {{DADA}2: {High}-resolution sample inference from {Illumina} amplicon data},
	volume = {13},
	copyright = {© 2016 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
	issn = {1548-7091},
	shorttitle = {{DADA}2},
	url = {http://www.nature.com.eproxy1.lib.hku.hk/nmeth/journal/v13/n7/full/nmeth.3869.html},
	doi = {10.1038/nmeth.3869},
	abstract = {We present the open-source software package DADA2 for modeling and correcting Illumina-sequenced amplicon errors (https://github.com/benjjneb/dada2). DADA2 infers sample sequences exactly and resolves differences of as little as 1 nucleotide. In several mock communities, DADA2 identified more real variants and output fewer spurious sequences than other methods. We applied DADA2 to vaginal samples from a cohort of pregnant women, revealing a diversity of previously undetected Lactobacillus crispatus variants.},
	language = {en},
	number = {7},
	urldate = {2017-03-24},
	journal = {Nature Methods},
	author = {Callahan, Benjamin J. and McMurdie, Paul J. and Rosen, Michael J. and Han, Andrew W. and Johnson, Amy Jo A. and Holmes, Susan P.},
	month = jul,
	year = {2016},
	keywords = {Metagenomics, Software, Statistical methods},
	pages = {581--583},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/CR9Z8XUQ/Callahan et al. - 2016 - DADA2 High-resolution sample inference from Illum.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/8A8ETXHW/nmeth.3869.html:text/html}
}

@article{schloss_introducing_2009,
	title = {Introducing mothur: open-source, platform-independent, community-supported software for describing and comparing microbial communities},
	volume = {75},
	issn = {1098-5336},
	shorttitle = {Introducing mothur},
	doi = {10.1128/AEM.01541-09},
	abstract = {mothur aims to be a comprehensive software package that allows users to use a single piece of software to analyze community sequence data. It builds upon previous tools to provide a flexible and powerful software package for analyzing sequencing data. As a case study, we used mothur to trim, screen, and align sequences; calculate distances; assign sequences to operational taxonomic units; and describe the alpha and beta diversity of eight marine samples previously characterized by pyrosequencing of 16S rRNA gene fragments. This analysis of more than 222,000 sequences was completed in less than 2 h with a laptop computer.},
	language = {eng},
	number = {23},
	journal = {Applied and Environmental Microbiology},
	author = {Schloss, Patrick D. and Westcott, Sarah L. and Ryabin, Thomas and Hall, Justine R. and Hartmann, Martin and Hollister, Emily B. and Lesniewski, Ryan A. and Oakley, Brian B. and Parks, Donovan H. and Robinson, Courtney J. and Sahl, Jason W. and Stres, Blaz and Thallinger, Gerhard G. and Van Horn, David J. and Weber, Carolyn F.},
	month = dec,
	year = {2009},
	pmid = {19801464},
	pmcid = {PMC2786419},
	keywords = {Biodiversity, Computational Biology, Environmental Microbiology, Metagenomics, Sequence Analysis, DNA, Software},
	pages = {7537--7541}
}

@article{kuczynski_using_2011,
	title = {Using {QIIME} to analyze 16S {rRNA} gene sequences from {Microbial} {Communities}},
	volume = {CHAPTER},
	issn = {1934-3396},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3249058/},
	doi = {10.1002/0471250953.bi1007s36},
	abstract = {QIIME (canonically pronounced ‘chime’) is software that performs microbial community analysis. It is an acronym for Quantitative Insights Into Microbial Ecology, and has been used to analyze and interpret nucleic acid sequence data from fungal, viral, bacterial, and archaeal communities., The following protocols describe how to install QIIME on a single computer, and use it to analyze microbial 16S sequence data from 9 distinct microbial communities.},
	urldate = {2017-03-24},
	journal = {Current protocols in bioinformatics / editoral board, Andreas D. Baxevanis ... [et al.]},
	author = {Kuczynski, Justin and Stombaugh, Jesse and Walters, William Anton and González, Antonio and Caporaso, J. Gregory and Knight, Rob},
	month = dec,
	year = {2011},
	pmid = {22161565},
	pmcid = {PMC3249058},
	pages = {Unit10.7},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/VPKB58DP/Kuczynski et al. - 2011 - Using QIIME to analyze 16S rRNA gene sequences fro.pdf:application/pdf}
}

@article{jombart_outbreaktools_2014,
	title = {{OutbreakTools}: a new platform for disease outbreak analysis using the {R} software},
	volume = {7},
	issn = {1878-0067},
	shorttitle = {{OutbreakTools}},
	doi = {10.1016/j.epidem.2014.04.003},
	abstract = {The investigation of infectious disease outbreaks relies on the analysis of increasingly complex and diverse data, which offer new prospects for gaining insights into disease transmission processes and informing public health policies. However, the potential of such data can only be harnessed using a number of different, complementary approaches and tools, and a unified platform for the analysis of disease outbreaks is still lacking. In this paper, we present the new R package OutbreakTools, which aims to provide a basis for outbreak data management and analysis in R. OutbreakTools is developed by a community of epidemiologists, statisticians, modellers and bioinformaticians, and implements classes and methods for storing, handling and visualizing outbreak data. It includes real and simulated outbreak datasets. Together with a number of tools for infectious disease epidemiology recently made available in R, OutbreakTools contributes to the emergence of a new, free and open-source platform for the analysis of disease outbreaks.},
	language = {eng},
	journal = {Epidemics},
	author = {Jombart, Thibaut and Aanensen, David M. and Baguelin, Marc and Birrell, Paul and Cauchemez, Simon and Camacho, Anton and Colijn, Caroline and Collins, Caitlin and Cori, Anne and Didelot, Xavier and Fraser, Christophe and Frost, Simon and Hens, Niel and Hugues, Joseph and Höhle, Michael and Opatowski, Lulla and Rambaut, Andrew and Ratmann, Oliver and Soubeyrand, Samuel and Suchard, Marc A. and Wallinga, Jacco and Ypma, Rolf and Ferguson, Neil},
	month = jun,
	year = {2014},
	pmid = {24928667},
	pmcid = {PMC4058532},
	keywords = {Computational Biology, Disease Outbreaks, Epidemiologic Methods, Health Information Management, Humans, Public Health Informatics, Software},
	pages = {28--34}
}

@article{hach_mrsfast:_2010,
	title = {{mrsFAST}: a cache-oblivious algorithm for short-read mapping},
	volume = {7},
	rights = {© 2010 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
	issn = {1548-7091},
	url = {http://www.nature.com.eproxy1.lib.hku.hk/nmeth/journal/v7/n8/full/nmeth0810-576.html},
	doi = {10.1038/nmeth0810-576},
	shorttitle = {{mrsFAST}},
	pages = {576--577},
	number = {8},
	journaltitle = {Nature Methods},
	shortjournal = {Nat Meth},
	author = {Hach, Faraz and Hormozdiari, Fereydoun and Alkan, Can and Hormozdiari, Farhad and Birol, Inanc and Eichler, Evan E. and Sahinalp, S. Cenk},
	urldate = {2017-04-13},
	date = {2010-08},
	year = {2009},
	langid = {english},
	keywords = {bioinformatics, Genetic mapping},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/KFCFUUZ4/Hach et al. - 2010 - mrsFAST a cache-oblivious algorithm for short-rea.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/4UUHXRPS/nmeth0810-576.html:text/html}
}

@article{trapnell_tophat:_2009,
	title = {{TopHat}: discovering splice junctions with {RNA}-Seq},
	volume = {25},
	issn = {1367-4803},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2672628/},
	doi = {10.1093/bioinformatics/btp120},
	shorttitle = {{TopHat}},
	abstract = {Motivation: A new protocol for sequencing the messenger {RNA} in a cell, known as {RNA}-Seq, generates millions of short sequence fragments in a single run. These fragments, or ‘reads’, can be used to measure levels of gene expression and to identify novel splice variants of genes. However, current software for aligning {RNA}-Seq data to a genome relies on known splice junctions and cannot identify novel ones. {TopHat} is an efficient read-mapping algorithm designed to align reads from an {RNA}-Seq experiment to a reference genome without relying on known splice sites., Results: We mapped the {RNA}-Seq reads from a recent mammalian {RNA}-Seq experiment and recovered more than 72\% of the splice junctions reported by the annotation-based software from that study, along with nearly 20 000 previously unreported junctions. The {TopHat} pipeline is much faster than previous systems, mapping nearly 2.2 million reads per {CPU} hour, which is sufficient to process an entire {RNA}-Seq experiment in less than a day on a standard desktop computer. We describe several challenges unique to ab initio splice site discovery from {RNA}-Seq reads that will require further algorithm development., Availability: {TopHat} is free, open-source software available from http://tophat.cbcb.umd.edu, Contact: cole@cs.umd.edu, Supplementary information: Supplementary data are available at Bioinformatics online.},
	pages = {1105--1111},
	number = {9},
	journaltitle = {Bioinformatics},
	shortjournal = {Bioinformatics},
	author = {Trapnell, Cole and Pachter, Lior and Salzberg, Steven L.},
	urldate = {2017-04-13},
	date = {2009-05-01},
	year = {2009},
	pmid = {19289445},
	pmcid = {PMC2672628},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/AFN8QC9P/Trapnell et al. - 2009 - TopHat discovering splice junctions with RNA-Seq.pdf:application/pdf}
}

@article{lunter_stampy:_2011,
	title = {Stampy: a statistical algorithm for sensitive and fast mapping of Illumina sequence reads},
	volume = {21},
	issn = {1549-5469},
	doi = {10.1101/gr.111120.110},
	shorttitle = {Stampy},
	abstract = {High-volume sequencing of {DNA} and {RNA} is now within reach of any research laboratory and is quickly becoming established as a key research tool. In many workflows, each of the short sequences ("reads") resulting from a sequencing run are first "mapped" (aligned) to a reference sequence to infer the read from which the genomic location derived, a challenging task because of the high data volumes and often large genomes. Existing read mapping software excel in either speed (e.g., {BWA}, Bowtie, {ELAND}) or sensitivity (e.g., Novoalign), but not in both. In addition, performance often deteriorates in the presence of sequence variation, particularly so for short insertions and deletions (indels). Here, we present a read mapper, Stampy, which uses a hybrid mapping algorithm and a detailed statistical model to achieve both speed and sensitivity, particularly when reads include sequence variation. This results in a higher useable sequence yield and improved accuracy compared to that of existing software.},
	pages = {936--939},
	number = {6},
	journaltitle = {Genome Research},
	shortjournal = {Genome Res.},
	author = {Lunter, Gerton and Goodson, Martin},
	date = {2011-06},
	year = {2011},
	pmid = {20980556},
	pmcid = {PMC3106326},
	keywords = {Algorithms, Models, Statistical, Sensitivity and Specificity, Sequence Alignment, Sequence Analysis, {DNA}, Software}
}

@article{li_soap2:_2009,
	title = {{SOAP}2: an improved ultrafast tool for short read alignment},
	volume = {25},
	issn = {1367-4803},
	url = {https://academic-oup-com.eproxy1.lib.hku.hk/bioinformatics/article/25/15/1966/212427/SOAP2-an-improved-ultrafast-tool-for-short-read},
	doi = {10.1093/bioinformatics/btp336},
	shorttitle = {{SOAP}2},
	pages = {1966--1967},
	number = {15},
	journaltitle = {Bioinformatics},
	shortjournal = {Bioinformatics},
	author = {Li, Ruiqiang and Yu, Chang and Li, Yingrui and Lam, Tak-Wah and Yiu, Siu-Ming and Kristiansen, Karsten and Wang, Jun},
	urldate = {2017-04-13},
	date = {2009-08-01},
	year = {2009},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/7AGQMW2R/Li et al. - 2009 - SOAP2 an improved ultrafast tool for short read a.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/632MRR6F/SOAP2-an-improved-ultrafast-tool-for-short-read.html:text/html}
}

@article{li_mapping_2008,
	title = {Mapping short {DNA} sequencing reads and calling variants using mapping quality scores},
	volume = {18},
	issn = {1088-9051, 1549-5469},
	url = {http://genome.cshlp.org/content/18/11/1851},
	doi = {10.1101/gr.078212.108},
	abstract = {New sequencing technologies promise a new era in the use of {DNA} sequence. However, some of these technologies produce very short reads, typically of a few tens of base pairs, and to use these reads effectively requires new algorithms and software. In particular, there is a major issue in efficiently aligning short reads to a reference genome and handling ambiguity or lack of accuracy in this alignment. Here we introduce the concept of mapping quality, a measure of the confidence that a read actually comes from the position it is aligned to by the mapping algorithm. We describe the software {MAQ} that can build assemblies by mapping shotgun short reads to a reference genome, using quality scores to derive genotype calls of the consensus sequence of a diploid genome, e.g., from a human sample. {MAQ} makes full use of mate-pair information and estimates the error probability of each read alignment. Error probabilities are also derived for the final genotype calls, using a Bayesian statistical model that incorporates the mapping qualities, error probabilities from the raw sequence quality scores, sampling of the two haplotypes, and an empirical model for correlated errors at a site. Both read mapping and genotype calling are evaluated on simulated data and real data. {MAQ} is accurate, efficient, versatile, and user-friendly. It is freely available at http://maq.sourceforge.net.},
	pages = {1851--1858},
	number = {11},
	journaltitle = {Genome Research},
	shortjournal = {Genome Res.},
	author = {Li, Heng and Ruan, Jue and Durbin, Richard},
	urldate = {2017-04-13},
	date = {2008-11-01},
	year = {2008},
	langid = {english},
	pmid = {18714091},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/M58TGTQD/Li et al. - 2008 - Mapping short DNA sequencing reads and calling var.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/R9V9K9FQ/1851.html:text/html}
}


@article{simpson_abyss:_2009,
	title = {{ABySS}: A parallel assembler for short read sequence data},
	volume = {19},
	issn = {1088-9051, 1549-5469},
	url = {http://genome.cshlp.org/content/19/6/1117},
	doi = {10.1101/gr.089532.108},
	shorttitle = {{ABySS}},
	abstract = {Widespread adoption of massively parallel deoxyribonucleic acid ({DNA}) sequencing instruments has prompted the recent development of de novo short read assembly algorithms. A common shortcoming of the available tools is their inability to efficiently assemble vast amounts of data generated from large-scale sequencing projects, such as the sequencing of individual human genomes to catalog natural genetic variation. To address this limitation, we developed {ABySS} (Assembly By Short Sequences), a parallelized sequence assembler. As a demonstration of the capability of our software, we assembled 3.5 billion paired-end reads from the genome of an African male publicly released by Illumina, Inc. Approximately 2.76 million contigs ≥100 base pairs (bp) in length were created with an N50 size of 1499 bp, representing 68\% of the reference human genome. Analysis of these contigs identified polymorphic and novel sequences not present in the human reference assembly, which were validated by alignment to alternate human assemblies and to other primate genomes.},
	pages = {1117--1123},
	number = {6},
	journaltitle = {Genome Research},
	shortjournal = {Genome Res.},
	author = {Simpson, Jared T. and Wong, Kim and Jackman, Shaun D. and Schein, Jacqueline E. and Jones, Steven J. M. and Birol, İnanç},
	urldate = {2014-11-23},
	date = {2009-06-01},
	langid = {english},
	pmid = {19251739},
	year = {2009},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/SVDWG8WJ/Simpson et al. - 2009 - ABySS A parallel assembler for short read sequenc.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/S7I2VKSN/1117.html:text/html}
}


@article{langmead_bowtie_2009,
	title = {Ultrafast and memory-efficient alignment of short {DNA} sequences to the human genome},
	volume = {10},
	copyright = {2009 Langmead et al.; licensee BioMed Central Ltd.},
	issn = {1474-760X},
	url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2009-10-3-r25},
	doi = {10.1186/gb-2009-10-3-r25},
	abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.},
	language = {En},
	number = {3},
	urldate = {2017-04-03},
	journal = {Genome Biology},
	author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.},
	month = mar,
	year = {2009},
	pages = {R25},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/7ZI4PSSH/Langmead et al. - 2009 - Ultrafast and memory-efficient alignment of short .pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/8IA6SEI2/gb-2009-10-3-r25.html:text/html}
}


@article{han_co-circulation_2014,
	title = {Co-circulation of three {HA} and two {NA} subtypes of avian influenza viruses in {Huzhou}, {China}, {April} 2013: implication for the origin of the novel {H}7N9 virus.},
	issn = {0022-538X, 1098-5514},
	shorttitle = {Co-circulation of three {HA} and two {NA} subtypes of avian influenza viruses in {Huzhou}, {China}, {April} 2013},
	url = {http://jvi.asm.org.eproxy1.lib.hku.hk/content/early/2014/03/06/JVI.03319-13},
	doi = {10.1128/JVI.03319-13},
	abstract = {We detected three avian influenza HA subtypes (H7, H9 and H5) and two NA subtypes (N9 and N2), as well as H7N9-related reassortant intermediates H9N9, co-circulating among poultry in Huzhou, China, during April 2013. The co-circulation not only reveals that Huzhou is one of the geographic origins of the novel H7N9 virus, but also poses a potential threat to humans in the future.},
	language = {en},
	urldate = {2017-04-11},
	journal = {Journal of Virology},
	author = {Han, Jiankang and Wang, Lili and Liu, Jia and Jin, Meihua and Hao, Fangyuan and Zhang, Peng and Zhang, Zhao and Wen, Dong and Wu, Xiaofang and Liu, Guangtao and Ji, Lei and Xu, Deshun and Zhou, Dongming and Leng, Qibin and Lan, Ke and Zhang, Chiyu},
	month = mar,
	year = {2014},
	pmid = {24623437},
	pages = {JVI.03319--13},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/S7JV45CE/Han et al. - 2014 - Co-circulation of three HA and two NA subtypes of .pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/P2UK6V2V/JVI.03319-13.html:text/html}
}

@article{zhang_co-infection_2015,
	title = {Co-infection with {Avian} ({H}7N9) and {Pandemic} ({H}1N1) 2009 {Influenza} {Viruses}, {China}},
	volume = {21},
	issn = {1080-6040},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4378472/},
	doi = {10.3201/eid2104.141560},
	number = {4},
	urldate = {2017-04-11},
	journal = {Emerging Infectious Diseases},
	author = {Zhang, Wanju and Zhu, Dongyi and Tian, Di and Xu, Lei and Zhu, Zhaokui and Teng, Zheng and He, Jing and Shan, Shan and Liu, Yi and Wang, Wei and Yuan, Zhenghong and Ren, Tao and Hu, Yunwen},
	month = apr,
	year = {2015},
	pmid = {25811107},
	pmcid = {PMC4378472},
	pages = {715--718},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/XHEXFRIB/Zhang et al. - 2015 - Co-infection with Avian (H7N9) and Pandemic (H1N1).pdf:application/pdf}
}

@article{viboud_multinational_2005,
	title = {Multinational {Impact} of the 1968 {Hong} {Kong} {Influenza} {Pandemic}: {Evidence} for a {Smoldering} {Pandemic}},
	volume = {192},
	issn = {0022-1899},
	shorttitle = {Multinational {Impact} of the 1968 {Hong} {Kong} {Influenza} {Pandemic}},
	url = {https://academic-oup-com.eproxy1.lib.hku.hk/jid/article/192/2/233/856805/Multinational-Impact-of-the-1968-Hong-Kong},
	doi = {10.1086/431150},
	number = {2},
	urldate = {2017-04-11},
	journal = {The Journal of Infectious Diseases},
	author = {Viboud, Cécile and Grais, Rebecca F. and Lafont, Bernard A. P. and Miller, Mark A. and Simonsen, Lone},
	month = jul,
	year = {2005},
	pages = {233--248},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/3V3BC459/Viboud et al. - 2005 - Multinational Impact of the 1968 Hong Kong Influen.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/NRPZQEZI/Multinational-Impact-of-the-1968-Hong-Kong.html:text/html}
}

@article{nikolenko_bayeshammer_2013,
	title = {{BayesHammer}: {Bayesian} clustering for error correction in single-cell sequencing},
	volume = {14},
	issn = {1471-2164},
	shorttitle = {{BayesHammer}},
	url = {http://dx.doi.org/10.1186/1471-2164-14-S1-S7},
	doi = {10.1186/1471-2164-14-S1-S7},
	abstract = {Error correction of sequenced reads remains a difficult task, especially in single-cell sequencing projects with extremely non-uniform coverage. While existing error correction tools designed for standard (multi-cell) sequencing data usually come up short in single-cell sequencing projects, algorithms actually used for single-cell error correction have been so far very simplistic.},
	number = {1},
	urldate = {2017-04-11},
	journal = {BMC Genomics},
	author = {Nikolenko, Sergey I. and Korobeynikov, Anton I. and Alekseyev, Max A.},
	year = {2013},
	pages = {S7},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/UK4Q5J6I/Nikolenko et al. - 2013 - BayesHammer Bayesian clustering for error correct.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/3W79ZK46/1471-2164-14-S1-S7.html:text/html}
}

@article{li_bwa_2010,
	title = {Fast and accurate long-read alignment with {Burrows}-{Wheeler} transform},
	volume = {26},
	issn = {1367-4811},
	doi = {10.1093/bioinformatics/btp698},
	abstract = {MOTIVATION: Many programs for aligning short sequencing reads to a reference genome have been developed in the last 2 years. Most of them are very efficient for short reads but inefficient or not applicable for reads {\textgreater}200 bp because the algorithms are heavily and specifically tuned for short queries with low sequencing error rate. However, some sequencing platforms already produce longer reads and others are expected to become available soon. For longer reads, hashing-based software such as BLAT and SSAHA2 remain the only choices. Nonetheless, these methods are substantially slower than short-read aligners in terms of aligned bases per unit time.
RESULTS: We designed and implemented a new algorithm, Burrows-Wheeler Aligner's Smith-Waterman Alignment (BWA-SW), to align long sequences up to 1 Mb against a large sequence database (e.g. the human genome) with a few gigabytes of memory. The algorithm is as accurate as SSAHA2, more accurate than BLAT, and is several to tens of times faster than both.
AVAILABILITY: http://bio-bwa.sourceforge.net},
	language = {eng},
	number = {5},
	journal = {Bioinformatics (Oxford, England)},
	author = {Li, Heng and Durbin, Richard},
	month = mar,
	year = {2010},
	pmid = {20080505},
	pmcid = {PMC2828108},
	keywords = {Algorithms, Base Sequence, Genome, Human, Genomics, Humans, Sequence Alignment, Sequence Analysis, DNA},
	pages = {589--595}
}

@article{edgar_muscle_2004,
	title = {{MUSCLE}: multiple sequence alignment with high accuracy and high throughput},
	volume = {32},
	issn = {0305-1048},
	shorttitle = {{MUSCLE}},
	url = {https://academic-oup-com.eproxy2.lib.hku.hk/nar/article/32/5/1792/2380623/MUSCLE-multiple-sequence-alignment-with-high},
	doi = {10.1093/nar/gkh340},
	number = {5},
	urldate = {2017-03-23},
	journal = {Nucleic Acids Research},
	author = {Edgar, Robert C.},
	month = mar,
	year = {2004},
	pages = {1792--1797},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/I7UAZRA6/Edgar - 2004 - MUSCLE multiple sequence alignment with high accu.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/8IC6SFZ7/gkh340.html:text/html}
}

@article{mcmurdie_phyloseq_2013,
	title = {phyloseq: an {R} package for reproducible interactive analysis and graphics of microbiome census data},
	volume = {8},
	issn = {1932-6203},
	shorttitle = {phyloseq},
	doi = {10.1371/journal.pone.0061217},
	abstract = {BACKGROUND: the analysis of microbial communities through dna sequencing brings many challenges: the integration of different types of data with methods from ecology, genetics, phylogenetics, multivariate statistics, visualization and testing. With the increased breadth of experimental designs now being pursued, project-specific statistical analyses are often needed, and these analyses are often difficult (or impossible) for peer researchers to independently reproduce. The vast majority of the requisite tools for performing these analyses reproducibly are already implemented in R and its extensions (packages), but with limited support for high throughput microbiome census data.
RESULTS: Here we describe a software project, phyloseq, dedicated to the object-oriented representation and analysis of microbiome census data in R. It supports importing data from a variety of common formats, as well as many analysis techniques. These include calibration, filtering, subsetting, agglomeration, multi-table comparisons, diversity analysis, parallelized Fast UniFrac, ordination methods, and production of publication-quality graphics; all in a manner that is easy to document, share, and modify. We show how to apply functions from other R packages to phyloseq-represented data, illustrating the availability of a large number of open source analysis techniques. We discuss the use of phyloseq with tools for reproducible research, a practice common in other fields but still rare in the analysis of highly parallel microbiome census data. We have made available all of the materials necessary to completely reproduce the analysis and figures included in this article, an example of best practices for reproducible research.
CONCLUSIONS: The phyloseq project for R is a new open-source software package, freely available on the web from both GitHub and Bioconductor.},
	language = {eng},
	number = {4},
	journal = {PloS One},
	author = {McMurdie, Paul J. and Holmes, Susan},
	year = {2013},
	pmid = {23630581},
	pmcid = {PMC3632530},
	keywords = {Data Interpretation, Statistical, Humans, Metagenome, Multivariate Analysis, Phylogeny, Principal Component Analysis, Sequence Analysis, DNA, Software},
	pages = {e61217}
}


@article{revell_phytools_2012,
	title = {phytools: an {R} package for phylogenetic comparative biology (and other things)},
	volume = {3},
	issn = {2041-210X},
	shorttitle = {phytools},
	url = {http://onlinelibrary.wiley.com.eproxy1.lib.hku.hk/doi/10.1111/j.2041-210X.2011.00169.x/abstract},
	doi = {10.1111/j.2041-210X.2011.00169.x},
	abstract = {1. Here, I present a new, multifunctional phylogenetics package, phytools, for the R statistical computing environment. 2. The focus of the package is on methods for phylogenetic comparative biology; however, it also includes tools for tree inference, phylogeny input/output, plotting, manipulation and several other tasks. 3. I describe and tabulate the major methods implemented in phytools, and in addition provide some demonstration of its use in the form of two illustrative examples. 4. Finally, I conclude by briefly describing an active web-log that I use to document present and future developments for phytools. I also note other web resources for phylogenetics in the R computational environment.},
	language = {en},
	number = {2},
	urldate = {2017-03-06},
	journal = {Methods in Ecology and Evolution},
	author = {Revell, Liam J.},
	month = apr,
	year = {2012},
	keywords = {blogging, Computational Biology, Evolution, Phylogeny, statistics},
	pages = {217--223},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/ZMV79M8E/Revell - 2012 - phytools an R package for phylogenetic comparativ.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/34CTPMM8/abstract.html:text/html}
}


@article{schliep_phangorn_2011,
	title = {phangorn: phylogenetic analysis in {R}},
	volume = {27},
	shorttitle = {phangorn},
	url = {http://bioinformatics.oxfordjournals.org/content/27/4/592.abstract},
	doi = {10.1093/bioinformatics/btq706},
	abstract = {Summary: phangorn is a package for phylogenetic reconstruction and analysis in the R language. Previously it was only possible to estimate phylogenetic trees with distance methods in R. phangorn, now offers the possibility of reconstructing phylogenies with distance based methods, maximum parsimony or maximum likelihood (ML) and performing Hadamard conjugation. Extending the general ML framework, this package provides the possibility of estimating mixture and partition models. Furthermore, phangorn offers several functions for comparing trees, phylogenetic models or splits, simulating character data and performing congruence analyses.Availability: phangorn can be obtained through the CRAN homepage http://cran.r-project.org/web/packages/phangorn/index.html. phangorn is licensed under GPL 2.Contact: klaus.kschliep@snv.jussieu.frSupplementary information: Supplementary data are available at Bioinformatics online.},
	number = {4},
	urldate = {2011-03-05},
	journal = {Bioinformatics},
	author = {Schliep, Klaus Peter},
	month = feb,
	year = {2011},
	pages = {592--593},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/K7S74NST/Schliep - 2011 - phangorn phylogenetic analysis in R.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/VEW599BE/592.html:text/html}
}

@article{paradis_ape_2004,
	title = {{APE}: {Analyses} of {Phylogenetics} and {Evolution} in {R} language},
	volume = {20},
	shorttitle = {{APE}},
	url = {http://bioinformatics.oxfordjournals.org/content/20/2/289.abstract},
	doi = {10.1093/bioinformatics/btg412},
	abstract = {Summary: Analysis of Phylogenetics and Evolution (APE) is a package written in the R language for use in molecular evolution and phylogenetics. APE provides both utility functions for reading and writing data and manipulating phylogenetic trees, as well as several advanced methods for phylogenetic and evolutionary analysis (e.g. comparative and population genetic methods). APE takes advantage of the many R functions for statistics and graphics, and also provides a flexible framework for developing and implementing further statistical methods for the analysis of evolutionary processes.Availability: The program is free and available from the official R package archive at http://cran.r-project.org/src/contrib/PACKAGES.html\#ape. APE is licensed under the GNU General Public License.},
	number = {2},
	urldate = {2011-03-04},
	journal = {Bioinformatics},
	author = {Paradis, Emmanuel and Claude, Julien and Strimmer, Korbinian},
	month = jan,
	year = {2004},
	pages = {289--290},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/ZTF3MA9A/Paradis et al. - 2004 - APE Analyses of Phylogenetics and Evolution in R .pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/9A2BT63C/289.html:text/html}
}

@book{wilkinson_grammar_2005,
	address = {New York},
	edition = {2nd edition},
	title = {The {Grammar} of {Graphics}},
	isbn = {978-0-387-24544-7},
	abstract = {Presents a unique foundation for producing almost every quantitative graphic found in scientific journals, newspapers, statistical packages, and data visualization systems The new edition features six new chapters and has undergone substantial revision. The first edition has sold more than 2200 copies. Four color throughout.},
	language = {English},
	publisher = {Springer},
	author = {Wilkinson, Leland and Wills, D. and Rope, D. and Norton, A. and Dubbs, R.},
	month = jul,
	year = {2005}
}

@book{wickham_ggplot2_2016,
	title = {ggplot2: Elegant Graphics for Data Analysis},
	isbn = {978-3-319-24277-4},
	url = {http://ggplot2.org},
	language = {English},
	urldate = {2017-01-05},
	publisher = {Springer},
	author = {Wickham, Hadley},
	month = jun,
	year = {2016}
}


@article{barton_broad_2016,
	title = {Broad activation of latent {HIV}-1 in vivo},
	volume = {7},
	issn = {2041-1723},
	doi = {10.1038/ncomms12731},
	abstract = {The 'shock and kill' approach to cure human immunodeficiency virus ({HIV}) includes transcriptional induction of latent {HIV}-1 proviruses using latency-reversing agents ({LRAs}) with targeted immunotherapy to purge infected cells. The administration of {LRAs} (panobinostat or vorinostat) to {HIV}-1-infected individuals on antiretroviral therapy induces a significant increase in cell-associated unspliced ({CA}-{US}) {HIV}-1 {RNA} from {CD}4(+) T cells. However, it is important to discern whether the increases in {CA}-{US} {HIV}-1 {RNA} are due to limited or broad activation of {HIV}-1 proviruses. Here we use single-genome sequencing to find that the {RNA} transcripts observed following {LRA} administration are genetically diverse, indicating activation of transcription from an extensive range of proviruses. Defective sequences are more frequently found in {CA} {HIV}-1 {RNA} than in {HIV}-1 {DNA}, which has implications for developing an accurate measure of {HIV}-1 reservoir size. Our findings provide insights into the effects of panobinostat and vorinostat as {LRAs} for latent {HIV}-1.},
	pages = {12731},
	journaltitle = {Nature Communications},
	shortjournal = {Nat Commun},
	author = {Barton, Kirston and Hiener, Bonnie and Winckelmann, Anni and Rasmussen, Thomas Aagaard and Shao, Wei and Byth, Karen and Lanfear, Robert and Solomon, Ajantha and {McMahon}, James and Harrington, Sean and Buzon, Maria and Lichterfeld, Mathias and Denton, Paul W. and Olesen, Rikke and Østergaard, Lars and Tolstrup, Martin and Lewin, Sharon R. and Søgaard, Ole Schmeltz and Palmer, Sarah},
	date = {2016-09-08},
	pmid = {27605062},
	pmcid = {PMC5025526},
	keywords = {Adult, Anti-{HIV} Agents, {CD}4-Positive T-Lymphocytes, {DNA}, Viral, Drug Administration Schedule, {HIV} Infections, {HIV}-1, Humans, Hydroxamic Acids, Indoles, Middle Aged, Panobinostat, {RNA}, Viral, Viremia, Virus Latency, Vorinostat}
}

@article{gentleman_bioconductor_2004,
	title = {Bioconductor: open software development for computational biology and bioinformatics},
	volume = {5},
	issn = {1465-6914},
	shorttitle = {Bioconductor},
	url = {http://www.ncbi.nlm.nih.gov/pubmed/15461798},
	doi = {10.1186/gb-2004-5-10-r80},
	abstract = {The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples.},
	number = {10},
	urldate = {2010-01-29},
	journal = {Genome Biology},
	author = {Gentleman, Robert C and Carey, Vincent J and Bates, Douglas M and Bolstad, Ben and Dettling, Marcel and Dudoit, Sandrine and Ellis, Byron and Gautier, Laurent and Ge, Yongchao and Gentry, Jeff and Hornik, Kurt and Hothorn, Torsten and Huber, Wolfgang and Iacus, Stefano and Irizarry, Rafael and Leisch, Friedrich and Li, Cheng and Maechler, Martin and Rossini, Anthony J and Sawitzki, Gunther and Smith, Colin and Smyth, Gordon and Tierney, Luke and Yang, Jean Y H and Zhang, Jianhua},
	year = {2004},
	pmid = {15461798},
	keywords = {Computational Biology, Internet, Reproducibility of Results, Software},
	pages = {R80}
}

@article{stamatakis_raxml_2014,
	title = {{RAxML} version 8: a tool for phylogenetic analysis and post-analysis of large phylogenies},
	volume = {30},
	issn = {1367-4803},
	shorttitle = {{RAxML} version 8},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3998144/},
	doi = {10.1093/bioinformatics/btu033},
	abstract = {Motivation: Phylogenies are increasingly used in all fields of medical and biological research. Moreover, because of the next-generation sequencing revolution, datasets used for conducting phylogenetic analyses grow at an unprecedented pace. RAxML (Randomized Axelerated Maximum Likelihood) is a popular program for phylogenetic analyses of large datasets under maximum likelihood. Since the last RAxML paper in 2006, it has been continuously maintained and extended to accommodate the increasingly growing input datasets and to serve the needs of the user community., Results: I present some of the most notable new features and extensions of RAxML, such as a substantial extension of substitution models and supported data types, the introduction of SSE3, AVX and AVX2 vector intrinsics, techniques for reducing the memory requirements of the code and a plethora of operations for conducting post-analyses on sets of trees. In addition, an up-to-date 50-page user manual covering all new RAxML options is available., Availability and implementation: The code is available under GNU GPL at https://github.com/stamatak/standard-RAxML., Contact:
alexandros.stamatakis@h-its.org, Supplementary information:
Supplementary data are available at Bioinformatics online.},
	number = {9},
	urldate = {2016-11-09},
	journal = {Bioinformatics},
	author = {Stamatakis, Alexandros},
	month = may,
	year = {2014},
	pmid = {24451623},
	pmcid = {PMC3998144},
	pages = {1312--1313}
}

@article{stamatakis_raxml-iii_2005,
	title = {{RAxML}-{III}: a fast program for maximum likelihood-based inference of large phylogenetic trees},
	volume = {21},
	issn = {1367-4803},
	shorttitle = {{RAxML}-{III}},
	doi = {10.1093/bioinformatics/bti191},
	abstract = {MOTIVATION: The computation of large phylogenetic trees with statistical models such as maximum likelihood or bayesian inference is computationally extremely intensive. It has repeatedly been demonstrated that these models are able to recover the true tree or a tree which is topologically closer to the true tree more frequently than less elaborate methods such as parsimony or neighbor joining. Due to the combinatorial and computational complexity the size of trees which can be computed on a Biologist's PC workstation within reasonable time is limited to trees containing approximately 100 taxa.
RESULTS: In this paper we present the latest release of our program RAxML-III for rapid maximum likelihood-based inference of large evolutionary trees which allows for computation of 1.000-taxon trees in less than 24 hours on a single PC processor. We compare RAxML-III to the currently fastest implementations for maximum likelihood and bayesian inference: PHYML and MrBayes. Whereas RAxML-III performs worse than PHYML and MrBayes on synthetic data it clearly outperforms both programs on all real data alignments used in terms of speed and final likelihood values. Availability
SUPPLEMENTARY INFORMATION: RAxML-III including all alignments and final trees mentioned in this paper is freely available as open source code at http://wwwbode.cs.tum/{\textasciitilde}stamatak
CONTACT: stamatak@cs.tum.edu.},
	language = {eng},
	number = {4},
	journal = {Bioinformatics (Oxford, England)},
	author = {Stamatakis, A. and Ludwig, T. and Meier, H.},
	month = feb,
	year = {2005},
	pmid = {15608047},
	keywords = {Algorithms, Artificial Intelligence, Biological Evolution, Computer Simulation, Computing Methodologies, Genetics, Population, Likelihood Functions, Models, Genetic, Models, Statistical, Phylogeny, Software},
	pages = {456--463},
	file = {Bti191.pdf:/Volumes/HOME/Zotero/storage/3RHZ75PN/Bti191.pdf:application/pdf}
}

@article{hohna_probabilistic_2014,
	title = {Probabilistic Graphical Model Representation in Phylogenetics},
	volume = {63},
	issn = {1063-5157, 1076-836X},
	url = {http://sysbio.oxfordjournals.org/content/63/5/753},
	doi = {10.1093/sysbio/syu039},
	abstract = {Recent years have seen a rapid expansion of the model space explored in statistical phylogenetics, emphasizing the need for new approaches to statistical model representation and software development. Clear communication and representation of the chosen model is crucial for: (i) reproducibility of an analysis, (ii) model development, and (iii) software design. Moreover, a unified, clear and understandable framework for model representation lowers the barrier for beginners and nonspecialists to grasp complex phylogenetic models, including their assumptions and parameter/variable dependencies. Graphical modeling is a unifying framework that has gained in popularity in the statistical literature in recent years. The core idea is to break complex models into conditionally independent distributions. The strength lies in the comprehensibility, flexibility, and adaptability of this formalism, and the large body of computational work based on it. Graphical models are well-suited to teach statistical models, to facilitate communication among phylogeneticists and in the development of generic software for simulation and statistical inference. Here, we provide an introduction to graphical models for phylogeneticists and extend the standard graphical model representation to the realm of phylogenetics. We introduce a new graphical model component, tree plates, to capture the changing structure of the subgraph corresponding to a phylogenetic tree. We describe a range of phylogenetic models using the graphical model framework and introduce modules to simplify the representation of standard components in large and complex models. Phylogenetic model graphs can be readily used in simulation, maximum likelihood inference, and Bayesian inference using, for example, Metropolis–Hastings or Gibbs sampling of the posterior distribution. [Computation; graphical models; inference; modularization; statistical phylogenetics; tree plate.]},
	pages = {753--771},
	number = {5},
	journaltitle = {Systematic Biology},
	shortjournal = {Syst Biol},
	author = {Höhna, Sebastian and Heath, Tracy A. and Boussau, Bastien and Landis, Michael J. and Ronquist, Fredrik and Huelsenbeck, John P.},
	urldate = {2015-11-17},
	date = {2014-09-01},
	langid = {english},
	year = {2014},
	pmid = {24951559}
}

@article{spielman_relationship_2015,
	title = {The {Relationship} between {dN}/{dS} and {Scaled} {Selection} {Coefficients}},
	volume = {32},
	issn = {0737-4038},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4379412/},
	doi = {10.1093/molbev/msv003},
	abstract = {Numerous computational methods exist to assess the mode and strength of natural selection in protein-coding sequences, yet how distinct methods relate to one another remains largely unknown. Here, we elucidate the relationship between two widely used phylogenetic modeling frameworks: dN/dS models and mutation-selection (MutSel) models. We derive a mathematical relationship between dN/dS and scaled selection coefficients, the focal parameters of MutSel models, and use this relationship to gain deeper insight into the behaviors, limitations, and applicabilities of these two modeling frameworks. We prove that, if all synonymous changes are neutral, standard MutSel models correspond to dN/dS≤1. However, if synonymous codons differ in fitness, dN/dS can take on arbitrarily high values even if all selection is purifying. Thus, the MutSel modeling framework cannot necessarily accommodate positive, diversifying selection, while dN/dS cannot distinguish between purifying selection on synonymous codons and positive selection on amino acids. We further propose a new benchmarking strategy of dN/dS inferences against MutSel simulations and demonstrate that the widely used Goldman–Yang-style dN/dS models yield substantially biased dN/dS estimates on realistic sequence data. In contrast, the less frequently used Muse–Gaut-style models display much less bias. Strikingly, the least-biased and most precise dN/dS estimates are never found in the models with the best fit to the data, measured through both AIC and BIC scores. Thus, selecting models based on goodness-of-fit criteria can yield poor parameter estimates if the models considered do not precisely correspond to the underlying mechanism that generated the data. In conclusion, establishing mathematical links among modeling frameworks represents a novel, powerful strategy to pinpoint previously unrecognized model limitations and strengths.},
	number = {4},
	urldate = {2017-04-28},
	journal = {Molecular Biology and Evolution},
	author = {Spielman, Stephanie J. and Wilke, Claus O.},
	month = apr,
	year = {2015},
	pmid = {25576365},
	pmcid = {PMC4379412},
	pages = {1097--1108},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/9UFKPTEN/Spielman and Wilke - 2015 - The Relationship between dNdS and Scaled Selectio.pdf:application/pdf}
}

@article{gire_genomic_2014,
	title = {Genomic surveillance elucidates {Ebola} virus origin and transmission during the 2014 outbreak},
	volume = {345},
	issn = {1095-9203},
	doi = {10.1126/science.1259657},
	abstract = {In its largest outbreak, Ebola virus disease is spreading through Guinea, Liberia, Sierra Leone, and Nigeria. We sequenced 99 Ebola virus genomes from 78 patients in Sierra Leone to {\textasciitilde}2000× coverage. We observed a rapid accumulation of interhost and intrahost genetic variation, allowing us to characterize patterns of viral transmission over the initial weeks of the epidemic. This West African variant likely diverged from central African lineages around 2004, crossed from Guinea to Sierra Leone in May 2014, and has exhibited sustained human-to-human transmission subsequently, with no evidence of additional zoonotic sources. Because many of the mutations alter protein sequences and other biologically meaningful targets, they should be monitored for impact on diagnostics, vaccines, and therapies critical to outbreak response.},
	language = {eng},
	number = {6202},
	journal = {Science (New York, N.Y.)},
	author = {Gire, Stephen K. and Goba, Augustine and Andersen, Kristian G. and Sealfon, Rachel S. G. and Park, Daniel J. and Kanneh, Lansana and Jalloh, Simbirie and Momoh, Mambu and Fullah, Mohamed and Dudas, Gytis and Wohl, Shirlee and Moses, Lina M. and Yozwiak, Nathan L. and Winnicki, Sarah and Matranga, Christian B. and Malboeuf, Christine M. and Qu, James and Gladden, Adrianne D. and Schaffner, Stephen F. and Yang, Xiao and Jiang, Pan-Pan and Nekoui, Mahan and Colubri, Andres and Coomber, Moinya Ruth and Fonnie, Mbalu and Moigboi, Alex and Gbakie, Michael and Kamara, Fatima K. and Tucker, Veronica and Konuwa, Edwin and Saffa, Sidiki and Sellu, Josephine and Jalloh, Abdul Azziz and Kovoma, Alice and Koninga, James and Mustapha, Ibrahim and Kargbo, Kandeh and Foday, Momoh and Yillah, Mohamed and Kanneh, Franklyn and Robert, Willie and Massally, James L. B. and Chapman, Sinéad B. and Bochicchio, James and Murphy, Cheryl and Nusbaum, Chad and Young, Sarah and Birren, Bruce W. and Grant, Donald S. and Scheiffelin, John S. and Lander, Eric S. and Happi, Christian and Gevao, Sahr M. and Gnirke, Andreas and Rambaut, Andrew and Garry, Robert F. and Khan, S. Humarr and Sabeti, Pardis C.},
	month = sep,
	year = {2014},
	pmid = {25214632},
	pmcid = {PMC4431643},
	keywords = {Base Sequence, Disease Outbreaks, Ebolavirus, Epidemiological Monitoring, Genetic Variation, Genome, Viral, Genomics, Hemorrhagic Fever, Ebola, Humans, Mutation, Sequence Analysis, DNA, Sierra Leone},
	pages = {1369--1372}
}

@article{smith_dating_2009,
	title = {Dating the emergence of pandemic influenza viruses},
	volume = {106},
	issn = {1091-6490},
	doi = {10.1073/pnas.0904991106},
	abstract = {Pandemic influenza viruses cause significant mortality in humans. In the 20th century, 3 influenza viruses caused major pandemics: the 1918 H1N1 virus, the 1957 H2N2 virus, and the 1968 H3N2 virus. These pandemics were initiated by the introduction and successful adaptation of a novel hemagglutinin subtype to humans from an animal source, resulting in antigenic shift. Despite global concern regarding a new pandemic influenza, the emergence pathway of pandemic strains remains unknown. Here we estimated the evolutionary history and inferred date of introduction to humans of each of the genes for all 20th century pandemic influenza strains. Our results indicate that genetic components of the 1918 H1N1 pandemic virus circulated in mammalian hosts, i.e., swine and humans, as early as 1911 and was not likely to be a recently introduced avian virus. Phylogenetic relationships suggest that the A/Brevig Mission/1/1918 virus (BM/1918) was generated by reassortment between mammalian viruses and a previously circulating human strain, either in swine or, possibly, in humans. Furthermore, seasonal and classic swine H1N1 viruses were not derived directly from BM/1918, but their precursors co-circulated during the pandemic. Mean estimates of the time of most recent common ancestor also suggest that the H2N2 and H3N2 pandemic strains may have been generated through reassortment events in unknown mammalian hosts and involved multiple avian viruses preceding pandemic recognition. The possible generation of pandemic strains through a series of reassortment events in mammals over a period of years before pandemic recognition suggests that appropriate surveillance strategies for detection of precursor viruses may abort future pandemics.},
	language = {eng},
	number = {28},
	journal = {Proceedings of the National Academy of Sciences of the United States of America},
	author = {Smith, Gavin J. D. and Bahl, Justin and Vijaykrishna, Dhanasekaran and Zhang, Jinxia and Poon, Leo L. M. and Chen, Honglin and Webster, Robert G. and Peiris, J. S. Malik and Guan, Yi},
	month = jul,
	year = {2009},
	pmid = {19597152},
	pmcid = {PMC2709671},
	keywords = {Bayes Theorem, Cluster Analysis, Disease Outbreaks, Evolution, Molecular, History, 20th Century, Humans, Influenza A virus, Influenza, Human, Models, Genetic, Phylogeny},
	pages = {11709--11712}
}

@article{hipsley_integration_2009,
	title = {Integration of {Bayesian} molecular clock methods and fossil-based soft bounds reveals early {Cenozoic} origin of {African} lacertid lizards},
	volume = {9},
	issn = {1471-2148},
	url = {http://dx.doi.org/10.1186/1471-2148-9-151},
	doi = {10.1186/1471-2148-9-151},
	abstract = {Although current molecular clock methods offer greater flexibility in modelling evolutionary events, calibration of the clock with dates from the fossil record is still problematic for many groups. Here we implement several new approaches in molecular dating to estimate the evolutionary ages of Lacertidae, an Old World family of lizards with a poor fossil record and uncertain phylogeny. Four different models of rate variation are tested in a new program for Bayesian phylogenetic analysis called TreeTime, based on a combination of mitochondrial and nuclear gene sequences. We incorporate paleontological uncertainty into divergence estimates by expressing multiple calibration dates as a range of probabilistic distributions. We also test the reliability of our proposed calibrations by exploring effects of individual priors on posterior estimates.},
	urldate = {2017-04-28},
	journal = {BMC Evolutionary Biology},
	author = {Hipsley, Christy A. and Himmelmann, Lin and Metzler, Dirk and Müller, Johannes},
	year = {2009},
	pages = {151},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/2EHJ4RFR/Hipsley et al. - 2009 - Integration of Bayesian molecular clock methods an.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/UHGAGKWT/1471-2148-9-151.html:text/html}
}

@article{wilkinson_dating_2011,
	title = {Dating {Primate} {Divergences} through an {Integrated} {Analysis} of {Palaeontological} and {Molecular} {Data}},
	volume = {60},
	issn = {1063-5157},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2997628/},
	doi = {10.1093/sysbio/syq054},
	abstract = {Estimation of divergence times is usually done using either the fossil record or sequence data from modern species. We provide an integrated analysis of palaeontological and molecular data to give estimates of primate divergence times that utilize both sources of information. The number of preserved primate species discovered in the fossil record, along with their geological age distribution, is combined with the number of extant primate species to provide initial estimates of the primate and anthropoid divergence times. This is done by using a stochastic forwards-modeling approach where speciation and fossil preservation and discovery are simulated forward in time. We use the posterior distribution from the fossil analysis as a prior distribution on node ages in a molecular analysis. Sequence data from two genomic regions (CFTR on human chromosome 7 and the CYP7A1 region on chromosome 8) from 15 primate species are used with the birth–death model implemented in mcmctree in PAML to infer the posterior distribution of the ages of 14 nodes in the primate tree. We find that these age estimates are older than previously reported dates for all but one of these nodes. To perform the inference, a new approximate Bayesian computation (ABC) algorithm is introduced, where the structure of the model can be exploited in an ABC-within-Gibbs algorithm to provide a more efficient analysis.},
	number = {1},
	urldate = {2017-04-28},
	journal = {Systematic Biology},
	author = {Wilkinson, Richard D. and Steiper, Michael E. and Soligo, Christophe and Martin, Robert D. and Yang, Ziheng and Tavaré, Simon},
	month = jan,
	year = {2011},
	pmid = {21051775},
	pmcid = {PMC2997628},
	pages = {16--31},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/UV4ZDN54/Wilkinson et al. - 2011 - Dating Primate Divergences through an Integrated A.pdf:application/pdf}
}

@article{huelsenbeck_mrbayes_2001,
	title = {{MRBAYES}: {Bayesian} inference of phylogenetic trees},
	volume = {17},
	issn = {1367-4803},
	shorttitle = {{MRBAYES}},
	abstract = {SUMMARY: The program MRBAYES performs Bayesian inference of phylogeny using a variant of Markov chain Monte Carlo.
AVAILABILITY: MRBAYES, including the source code, documentation, sample data files, and an executable, is available at http://brahms.biology.rochester.edu/software.html.},
	language = {eng},
	number = {8},
	journal = {Bioinformatics (Oxford, England)},
	author = {Huelsenbeck, J. P. and Ronquist, F.},
	month = aug,
	year = {2001},
	pmid = {11524383},
	keywords = {Algorithms, Bayes Theorem, Computational Biology, Markov Chains, Phylogeny, Software},
	pages = {754--755}
}

@misc{nhx,
	title = {{NHX}},
	author = {Zmasek, Christian M.},
	url = {https://sites.google.com/site/cmzmasek/home/software/forester/nhx},
	urldate = {2016-11-11},
	year = {2016}
}

@misc{archaeopteryx,
	title = {Archaeopteryx},
	author = {Zmasek, Christian M.},
	url = {https://sites.google.com/site/cmzmasek/home/software/archaeopteryx},
	urldate = {2016-11-11},
	year = {2016}
}


@article{matsen_format_2012,
	title = {A {Format} for {Phylogenetic} {Placements}},
	volume = {7},
	issn = {1932-6203},
	url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009},
	doi = {10.1371/journal.pone.0031009},
	abstract = {We have developed a unified format for  phylogenetic placements , that is, mappings of environmental sequence data (e.g., short reads) into a phylogenetic tree. We are motivated to do so by the growing number of tools for computing and post-processing phylogenetic placements, and the lack of an established standard for storing them. The format is lightweight, versatile, extensible, and is based on the JSON format, which can be parsed by most modern programming languages. Our format is already implemented in several tools for computing and post-processing parsimony- and likelihood-based phylogenetic placements and has worked well in practice. We believe that establishing a standard format for analyzing read placements at this early stage will lead to a more efficient development of powerful and portable post-analysis tools for the growing applications of phylogenetic placement.},
	number = {2},
	urldate = {2016-11-10},
	journal = {PLOS ONE},
	author = {Matsen, Frederick A. and Hoffman, Noah G. and Gallagher, Aaron and Stamatakis, Alexandros},
	month = feb,
	year = {2012},
	keywords = {Algorithms, Multiple alignment calculation, phylogenetic analysis, phylogenetics, Programming Languages, Sequence Alignment, Sequence Analysis, Sequence databases},
	pages = {e31009},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/AV374V4U/Matsen et al. - 2012 - A Format for Phylogenetic Placements.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/P74CXNGV/article.html:text/html}
}

@article{matsen_pplacer_2010,
	title = {pplacer: linear time maximum-likelihood and Bayesian phylogenetic placement of sequences onto a fixed reference tree},
	volume = {11},
	issn = {1471-2105},
	shorttitle = {pplacer},
	url = {http://www.biomedcentral.com/1471-2105/11/538},
	doi = {10.1186/1471-2105-11-538},
	language = {en},
	number = {1},
	urldate = {2015-01-05},
	journal = {{BMC} Bioinformatics},
	author = {Matsen, Frederick A and Kodner, Robin B and Armbrust, E Virginia},
	year = {2010},
	pages = {538}
}

@article{zmasek_atv:_2001,
	title = {{ATV}: display and manipulation of annotated phylogenetic trees},
	volume = {17},
	issn = {1367-4803, 1460-2059},
	shorttitle = {{ATV}},
	url = {http://bioinformatics.oxfordjournals.org.eproxy1.lib.hku.hk/content/17/4/383},
	doi = {10.1093/bioinformatics/17.4.383},
	abstract = {Summary: A Tree Viewer (ATV) is a Java tool for the display and manipulation of annotated phylogenetic trees. It can be utilized both as a standalone application and as an applet in a web browser.
Availability: ATV is available via WWW at http://www.genetics.wustl.edu/eddy/atv/ and via FTP at ftp://ftp.genetics.wustl.edu/pub/eddy/software/forester.tar.Z
Contact: eddy@genetics.wustl.edu},
	language = {en},
	number = {4},
	urldate = {2016-11-10},
	journal = {Bioinformatics},
	author = {Zmasek, Christian M. and Eddy, Sean R.},
	month = apr,
	year = {2001},
	pmid = {11301314},
	pages = {383--384}
}

@article{yoder_estimation_2000,
	title = {Estimation of primate speciation dates using local molecular clocks},
	volume = {17},
	issn = {0737-4038},
	abstract = {Protein-coding genes of the mitochondrial genomes from 31 mammalian species were analyzed to estimate the speciation dates within primates and also between rats and mice. Three calibration points were used based on paleontological data: one at 20-25 MYA for the hominoid/cercopithecoid divergence, one at 53-57 MYA for the cetacean/artiodactyl divergence, and the third at 110-130 MYA for the metatherian/eutherian divergence. Both the nucleotide and the amino acid sequences were analyzed, producing conflicting results. The global molecular clock was clearly violated for both the nucleotide and the amino acid data. Models of local clocks were implemented using maximum likelihood, allowing different evolutionary rates for some lineages while assuming rate constancy in others. Surprisingly, the highly divergent third codon positions appeared to contain phylogenetic information and produced more sensible estimates of primate divergence dates than did the amino acid sequences. Estimated dates varied considerably depending on the data type, the calibration point, and the substitution model but differed little among the four tree topologies used. We conclude that the calibration derived from the primate fossil record is too recent to be reliable; we also point out a number of problems in date estimation when the molecular clock does not hold. Despite these obstacles, we derived estimates of primate divergence dates that were well supported by the data and were generally consistent with the paleontological record. Estimation of the mouse-rat divergence date, however, was problematic.},
	language = {eng},
	number = {7},
	journal = {Molecular Biology and Evolution},
	author = {Yoder, A. D. and Yang, Z.},
	month = jul,
	year = {2000},
	pmid = {10889221},
	keywords = {Animals, Codon, Evolution, Molecular, Humans, Mice, Phylogeny, Primates, Rats, Species Specificity},
	pages = {1081--1090}
}

@article{yang_paml_2007,
	title = {{PAML} 4: {Phylogenetic} {Analysis} by {Maximum} {Likelihood}},
	volume = {24},
	issn = {0737-4038, 1537-1719},
	shorttitle = {{PAML} 4},
	url = {http://mbe.oxfordjournals.org/content/24/8/1586},
	doi = {10.1093/molbev/msm088},
	abstract = {PAML, currently in version 4, is a package of programs for phylogenetic analyses of DNA and protein sequences using maximum likelihood (ML). The programs may be used to compare and test phylogenetic trees, but their main strengths lie in the rich repertoire of evolutionary models implemented, which can be used to estimate parameters in models of sequence evolution and to test interesting biological hypotheses. Uses of the programs include estimation of synonymous and nonsynonymous rates (dN and dS) between two protein-coding DNA sequences, inference of positive Darwinian selection through phylogenetic comparison of protein-coding genes, reconstruction of ancestral genes and proteins for molecular restoration studies of extinct life forms, combined analysis of heterogeneous data sets from multiple gene loci, and estimation of species divergence times incorporating uncertainties in fossil calibrations. This note discusses some of the major applications of the package, which includes example data sets to demonstrate their use. The package is written in ANSI C, and runs under Windows, Mac OSX, and UNIX systems. It is available at http://abacus.gene.ucl.ac.uk/software/paml.html.},
	language = {en},
	number = {8},
	urldate = {2016-11-09},
	journal = {Molecular Biology and Evolution},
	author = {Yang, Ziheng},
	month = aug,
	year = {2007},
	pmid = {17483113},
	keywords = {codon models, likelihood, PAML, phylogenetic analysis, Software},
	pages = {1586--1591},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/BA92JETJ/Yang - 2007 - PAML 4 Phylogenetic Analysis by Maximum Likelihoo.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/GBVTCSRB/1586.html:text/html}
}


@article{wang_evidence_2002,
	title = {Evidence for strong selective constraint acting on the nucleotide composition of 16S ribosomal {RNA} genes},
	volume = {30},
	issn = {0305-1048},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC117185/},
	abstract = {Previous studies have shown that the guanine plus cytosine (G+C) content of ribosomal RNAs (rRNAs) is highly correlated with bacterial growth temperatures. This correlation is strongest in the double-stranded stem regions of the rRNA, a fact that can be explained by selection for increased structural stability at high growth temperatures. In this study, we examined the single-stranded regions of 16S rRNAs. We reasoned that, since these regions of the molecule are subject to less structural constraint than the stem regions, their nucleotide content might simply reflect the overall nucleotide content of the genome. Contrary to this expectation, however, we found that all of the single-stranded regions are characterized by very high adenine (A) and relatively low cytosine (C) contents. Moreover, the nucleotide content of these single-stranded regions is surprisingly constant between species, despite dramatic differences in optimal growth temperatures, and despite large differences in the overall genomic G+C content. This provides compelling evidence for strong stabilizing selection acting on 16S rRNA single-stranded regions. We found that selection favors purines (A+G), and especially adenine (A), in the single-stranded regions of these rRNAs.},
	number = {11},
	urldate = {2017-05-25},
	journal = {Nucleic Acids Research},
	author = {Wang, Huai-chun and Hickey, Donal A.},
	month = jun,
	year = {2002},
	pmid = {12034839},
	pmcid = {PMC117185},
	pages = {2501--2507},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/IU3TS49V/Wang and Hickey - 2002 - Evidence for strong selective constraint acting on.pdf:application/pdf}
}

@article{van_hemert_impact_2016,
	title = {Impact of the biased nucleotide composition of viral {RNA} genomes on {RNA} structure and codon usage},
	volume = {97},
	url = {http://jgv.microbiologyresearch.org/content/journal/jgv/10.1099/jgv.0.000579},
	doi = {10.1099/jgv.0.000579},
	abstract = {We are interested in the influence of nucleotide composition on the fundamental characteristics of the virus RNA genome. Most RNA viruses have genomes with a distinct nucleotide composition, e.g. ranging from minimally 12.9 \% to maximally 40.3 \% (C- and U-count, respectively, in coronavirus HKU). We present a global analysis of diverse virus types, including plus-strand, minus-strand and double-strand RNA viruses, for the impact of this nucleotide preference on the predicted structure of the RNA genome that is packaged in virion particles and on the codon usage in the viral open reading frames. Several virus-specific features will be described, but also some general conclusions were drawn. Without exception, the virus-specific nucleotide bias was enriched in the unpaired, single-stranded regions of the RNA genome, thus creating an even more striking virus-specific signature. We present a simple mechanism that is based on elementary aspects of RNA structure folding to explain this general trend. In general, the nucleotide bias was the major determinant of the virus-specific codon usages, thus limiting a role for codon selection and translational control. We will discuss molecular and evolutionary scenarios that may be responsible for the diverse nucleotide biases of RNA viruses.},
	number = {10},
	urldate = {2017-05-25},
	journal = {Journal of General Virology},
	author = {van Hemert, Formijn and van der Kuyl, Antoinette C. and Berkhout, Ben},
	year = {2016},
	pages = {2608--2619},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/KDD9AI7D/jgv.0.html:text/html}
}

@article{mukherjee_genomes_2017,
	title = {Genomes {OnLine} {Database} ({GOLD}) v.6: data updates and feature enhancements},
	volume = {45},
	issn = {0305-1048},
	shorttitle = {Genomes {OnLine} {Database} ({GOLD}) v.6},
	url = {https://academic-oup-com.eproxy2.lib.hku.hk/nar/article/45/D1/D446/2333884/Genomes-OnLine-Database-GOLD-v-6-data-updates-and},
	doi = {10.1093/nar/gkw992},
	number = {D1},
	urldate = {2017-03-15},
	journal = {Nucleic Acids Research},
	author = {Mukherjee, Supratim and Stamatis, Dimitri and Bertsch, Jon and Ovchinnikova, Galina and Verezemska, Olena and Isbandi, Michelle and Thomas, Alex D. and Ali, Rida and Sharma, Kaushal and Kyrpides, Nikos C. and Reddy, T. B. K.},
	month = jan,
	year = {2017},
	pages = {D446--D456},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/64R7NGAT/Mukherjee et al. - 2017 - Genomes OnLine Database (GOLD) v.6 data updates a.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/DFNXDGNV/Genomes-OnLine-Database-GOLD-v-6-data-updates-and.html:text/html}
}


@article{vienne_lifemap:_2016,
	title = {Lifemap: {Exploring} the {Entire} {Tree} of {Life}},
	volume = {14},
	issn = {1545-7885},
	shorttitle = {Lifemap},
	url = {http://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.2001624},
	doi = {10.1371/journal.pbio.2001624},
	abstract = {The Tree of Life (ToL) is meant to be a unique representation of the evolutionary relationships between all species on earth. Huge efforts are made to assemble such a large tree, helped by the decrease of sequencing costs and improved methods to reconstruct and combine phylogenies, but no tool exists today to explore the ToL in its entirety in a satisfying manner. By combining methods used in modern cartography, such as OpenStreetMap, with a new way of representing tree-like structures, I created Lifemap, a tool allowing the exploration of a complete representation of the ToL (between 800,000 and 2.2 million species depending on the data source) in a zoomable interface. A server version of Lifemap also allows users to visualize their own trees. This should help researchers in ecology and evolutionary biology in their everyday work, but may also permit the diffusion to a broader audience of our current knowledge of the evolutionary relationships linking all organisms.},
	number = {12},
	urldate = {2017-05-23},
	journal = {PLOS Biology},
	author = {Vienne, Damien M. de},
	month = dec,
	year = {2016},
	keywords = {Computing methods, evolutionary biology, Genomic databases, Online encyclopedias, phylogenetic analysis, phylogenetics, Plant ecology, Taxonomy},
	pages = {e2001624},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/KZAC3NIA/Vienne - 2016 - Lifemap Exploring the Entire Tree of Life.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/G7WHSU22/authors.html:text/html}
}


@article{allende_treelink:_2015,
	title = {Treelink: data integration, clustering and visualization of phylogenetic trees},
	volume = {16},
	issn = {1471-2105},
	shorttitle = {Treelink},
	url = {http://dx.doi.org/10.1186/s12859-015-0860-1},
	doi = {10.1186/s12859-015-0860-1},
	abstract = {Phylogenetic trees are central to a wide range of biological studies. In many of these studies, tree nodes need to be associated with a variety of attributes. For example, in studies concerned with viral relationships, tree nodes are associated with epidemiological information, such as location, age and subtype. Gene trees used in comparative genomics are usually linked with taxonomic information, such as functional annotations and events. A wide variety of tree visualization and annotation tools have been developed in the past, however none of them are intended for an integrative and comparative analysis.},
	urldate = {2017-05-23},
	journal = {BMC Bioinformatics},
	author = {Allende, Christian and Sohn, Erik and Little, Cedric},
	year = {2015},
	keywords = {Clustering, Data integration, phylogenetic tree, visualization},
	pages = {414},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/WJUSJ9RS/Allende et al. - 2015 - Treelink data integration, clustering and visuali.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/T97B4F73/s12859-015-0860-1.pdf:application/pdf}
}


@article{pawlowski_tuberculosis_2012,
	title = {Tuberculosis and {HIV} {Co}-{Infection}},
	volume = {8},
	issn = {1553-7374},
	url = {http://journals.plos.org/plospathogens/article?id=10.1371/journal.ppat.1002464},
	doi = {10.1371/journal.ppat.1002464},
	abstract = {Tuberculosis (TB) and HIV co-infections place an immense burden on health care systems and pose particular diagnostic and therapeutic challenges. Infection with HIV is the most powerful known risk factor predisposing for Mycobacterium tuberculosis infection and progression to active disease, which increases the risk of latent TB reactivation 20-fold. TB is also the most common cause of AIDS-related death. Thus, M. tuberculosis and HIV act in synergy, accelerating the decline of immunological functions and leading to subsequent death if untreated. The mechanisms behind the breakdown of the immune defense of the co-infected individual are not well known. The aim of this review is to highlight immunological events that may accelerate the development of one of the two diseases in the presence of the co-infecting organism. We also review possible animal models for studies of the interaction of the two pathogens, and describe gaps in knowledge and needs for future studies to develop preventive measures against the two diseases.},
	number = {2},
	urldate = {2017-05-23},
	journal = {PLOS Pathogens},
	author = {Pawlowski, Andrzej and Jansson, Marianne and Sköld, Markus and Rottenberg, Martin E. and Källenius, Gunilla},
	month = feb,
	year = {2012},
	keywords = {HIV, HIV infections, Immune response, Macaque, Mouse models, Mycobacterium tuberculosis, T cells, Tuberculosis},
	pages = {e1002464},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/VNBJEE65/Pawlowski et al. - 2012 - Tuberculosis and HIV Co-Infection.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/9ZIHRXQ2/article.html:text/html}
}


@article{nurk_metaspades:_2017,
	title = {{metaSPAdes}: a new versatile metagenomic assembler},
	issn = {1088-9051, 1549-5469},
	shorttitle = {{metaSPAdes}},
	url = {http://genome.cshlp.org/content/early/2017/04/07/gr.213959.116},
	doi = {10.1101/gr.213959.116},
	abstract = {While metagenomics has emerged as a technology of choice for analyzing bacterial populations, the assembly of metagenomic data remains challenging, thus stifling biological discoveries. Moreover, recent studies revealed that complex bacterial populations may be composed from dozens of related strains, thus further amplifying the challenge of metagenomic assembly. metaSPAdes addresses various challenges of metagenomic assembly by capitalizing on computational ideas that proved to be useful in assemblies of single cells and highly polymorphic diploid genomes. We benchmark metaSPAdes against other state-of-the-art metagenome assemblers and demonstrate that it results in high-quality assemblies across diverse data sets.},
	language = {en},
	urldate = {2017-05-23},
	journal = {Genome Research},
	author = {Nurk, Sergey and Meleshko, Dmitry and Korobeynikov, Anton and Pevzner, Pavel A.},
	month = mar,
	year = {2017},
	pmid = {28298430},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/PRIKCIM4/gr.213959.html:text/html}
}

@article{peng_meta-idba:_2011,
	title = {Meta-{IDBA}: a de {Novo} assembler for metagenomic data},
	volume = {27},
	issn = {1367-4803},
	shorttitle = {Meta-{IDBA}},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3117360/},
	doi = {10.1093/bioinformatics/btr216},
	abstract = {Motivation: Next-generation sequencing techniques allow us to generate reads from a microbial environment in order to analyze the microbial community. However, assembling of a set of mixed reads from different species to form contigs is a bottleneck of metagenomic research. Although there are many assemblers for assembling reads from a single genome, there are no assemblers for assembling reads in metagenomic data without reference genome sequences. Moreover, the performances of these assemblers on metagenomic data are far from satisfactory, because of the existence of common regions in the genomes of subspecies and species, which make the assembly problem much more complicated., Results: We introduce the Meta-IDBA algorithm for assembling reads in metagenomic data, which contain multiple genomes from different species. There are two core steps in Meta-IDBA. It first tries to partition the de Bruijn graph into isolated components of different species based on an important observation. Then, for each component, it captures the slight variants of the genomes of subspecies from the same species by multiple alignments and represents the genome of one species, using a consensus sequence. Comparison of the performances of Meta-IDBA and existing assemblers, such as Velvet and Abyss for different metagenomic datasets shows that Meta-IDBA can reconstruct longer contigs with similar accuracy., Availability: Meta-IDBA toolkit is available at our website http://www.cs.hku.hk/{\textasciitilde}alse/metaidba., Contact: chin@cs.hku.hk},
	number = {13},
	urldate = {2017-05-23},
	journal = {Bioinformatics},
	author = {Peng, Yu and Leung, Henry C. M. and Yiu, S. M. and Chin, Francis Y. L.},
	month = jul,
	year = {2011},
	pmid = {21685107},
	pmcid = {PMC3117360},
	pages = {i94--i101}
}

@article{pond_hyphy:_2005,
	title = {{HyPhy}: hypothesis testing using phylogenies},
	volume = {21},
	issn = {1367-4803},
	shorttitle = {{HyPhy}},
	doi = {10.1093/bioinformatics/bti079},
	abstract = {The HyPhypackage is designed to provide a flexible and unified platform for carrying out likelihood-based analyses on multiple alignments of molecular sequence data, with the emphasis on studies of rates and patterns of sequence evolution.
AVAILABILITY: http://www.hyphy.org
CONTACT: muse@stat.ncsu.edu
SUPPLEMENTARY INFORMATION: HyPhydocumentation and tutorials are available at http://www.hyphy.org.},
	language = {ENG},
	number = {5},
	journal = {Bioinformatics (Oxford, England)},
	author = {Pond, Sergei L. Kosakovsky and Frost, Simon D. W. and Muse, Spencer V.},
	month = mar,
	year = {2005},
	pmid = {15509596},
	keywords = {Algorithms, Computer Simulation, Evolution, Molecular, Models, Genetic, Phylogeny, Sequence Alignment, Sequence Analysis, Protein, Software, User-Computer Interface},
	pages = {676--679}
}


@article{bouckaert_beast_2014,
	title = {{BEAST} 2: {A} {Software} {Platform} for {Bayesian} {Evolutionary} {Analysis}},
	volume = {10},
	shorttitle = {{BEAST} 2},
	url = {http://dx.doi.org/10.1371/journal.pcbi.1003537},
	doi = {10.1371/journal.pcbi.1003537},
	abstract = {We present a new open source, extensible and flexible software platform for Bayesian evolutionary analysis called BEAST 2. This software platform is a re-design of the popular BEAST 1 platform to correct structural deficiencies that became evident as the BEAST 1 software evolved. Key among those deficiencies was the lack of post-deployment extensibility. BEAST 2 now has a fully developed package management system that allows third party developers to write additional functionality that can be directly installed to the BEAST 2 analysis platform via a package manager without requiring a new software release of the platform. This package architecture is showcased with a number of recently published new models encompassing birth-death-sampling tree priors, phylodynamics and model averaging for substitution models and site partitioning. A second major improvement is the ability to read/write the entire state of the MCMC chain to/from disk allowing it to be easily shared between multiple instances of the BEAST software. This facilitates checkpointing and better support for multi-processor and high-end computing extensions. Finally, the functionality in new packages can be easily added to the user interface (BEAUti 2) by a simple XML template-based mechanism because BEAST 2 has been re-designed to provide greater integration between the analysis engine and the user interface so that, for example BEAST and BEAUti use exactly the same XML file format.},
	number = {4},
	urldate = {2015-05-27},
	journal = {PLoS Comput Biol},
	author = {Bouckaert, Remco and Heled, Joseph and Kühnert, Denise and Vaughan, Tim and Wu, Chieh-Hsi and Xie, Dong and Suchard, Marc A. and Rambaut, Andrew and Drummond, Alexei J.},
	month = apr,
	year = {2014},
	pages = {e1003537},
	file = {PLoS Full Text PDF:/Volumes/HOME/Zotero/storage/WHECE65J/Bouckaert et al. - 2014 - BEAST 2 A Software Platform for Bayesian Evolutio.pdf:application/pdf}
}

@article{hohna_revbayes:_2016,
	title = {{RevBayes}: {Bayesian} {Phylogenetic} {Inference} {Using} {Graphical} {Models} and an {Interactive} {Model}-{Specification} {Language}},
	volume = {65},
	issn = {1063-5157, 1076-836X},
	shorttitle = {{RevBayes}},
	url = {http://sysbio.oxfordjournals.org/content/65/4/726},
	doi = {10.1093/sysbio/syw021},
	abstract = {Programs for Bayesian inference of phylogeny currently implement a unique and ﬁxed suite of models. Consequently, users of these software packages are simultaneously forced to use a number of programs for a given study, while also lacking the freedom to explore models that have not been implemented by the developers of those programs. We developed a new open-source software package, RevBayes, to address these problems. RevBayes is entirely based on probabilistic graphical models, a powerful generic framework for specifying and analyzing statistical models. Phylogenetic-graphical models can be speciﬁed interactively in RevBayes, piece by piece, using a new succinct and intuitive language called Rev. Rev is similar to the R language and the BUGS model-speciﬁcation language, and should be easy to learn for most users. The strength of RevBayes is the simplicity with which one can design, specify, and implement new and complex models. Fortunately, this tremendous ﬂexibility does not come at the cost of slower computation; as we demonstrate, RevBayes outperforms competing software for several standard analyses. Compared with other programs, RevBayes has fewer black-box elements. Users need to explicitly specify each part of the model and analysis. Although this explicitness may initially be unfamiliar, we are convinced that this transparency will improve understanding of phylogenetic models in our ﬁeld. Moreover, it will motivate the search for improvements to existing methods by brazenly exposing the model choices that we make to critical scrutiny. RevBayes is freely available at http://www.RevBayes.com. [Bayesian inference; Graphical models; MCMC; statistical phylogenetics.]},
	language = {en},
	number = {4},
	urldate = {2016-11-09},
	journal = {Systematic Biology},
	author = {Höhna, Sebastian and Landis, Michael J. and Heath, Tracy A. and Boussau, Bastien and Lartillot, Nicolas and Moore, Brian R. and Huelsenbeck, John P. and Ronquist, Fredrik},
	month = jul,
	year = {2016},
	pmid = {27235697},
	pages = {726--736},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/XBPPD65E/Höhna et al. - 2016 - RevBayes Bayesian Phylogenetic Inference Using Gr.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/69K393CD/726.html:text/html}
}

@article{sanderson_r8s:_2003,
	title = {r8s: inferring absolute rates of molecular evolution and divergence times in the absence of a molecular clock},
	volume = {19},
	issn = {1367-4803, 1460-2059},
	shorttitle = {r8s},
	url = {http://bioinformatics.oxfordjournals.org./content/19/2/301},
	doi = {10.1093/bioinformatics/19.2.301},
	language = {en},
	number = {2},
	urldate = {2015-09-07},
	journal = {Bioinformatics},
	author = {Sanderson, Michael J.},
	month = jan,
	year = {2003},
	pmid = {12538260},
	pages = {301--302}
}

@article{matsen_pplacer:_2010,
	title = {pplacer: linear time maximum-likelihood and {Bayesian} phylogenetic placement of sequences onto a fixed reference tree},
	volume = {11},
	issn = {1471-2105},
	shorttitle = {pplacer},
	url = {http://dx.doi.org/10.1186/1471-2105-11-538},
	doi = {10.1186/1471-2105-11-538},
	abstract = {Likelihood-based phylogenetic inference is generally considered to be the most reliable classification method for unknown sequences. However, traditional likelihood-based phylogenetic methods cannot be applied to large volumes of short reads from next-generation sequencing due to computational complexity issues and lack of phylogenetic signal. "Phylogenetic placement," where a reference tree is fixed and the unknown query sequences are placed onto the tree via a reference alignment, is a way to bring the inferential power offered by likelihood-based approaches to large data sets.},
	urldate = {2016-11-09},
	journal = {BMC Bioinformatics},
	author = {Matsen, Frederick A. and Kodner, Robin B. and Armbrust, E Virginia},
	year = {2010},
	pages = {538},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/9ZUWFKZF/Matsen et al. - 2010 - pplacer linear time maximum-likelihood and Bayesi.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/S56ZJD7H/1471-2105-11-538.html:text/html}
}

@article{boussau_genome-scale_2013,
	title = {Genome-scale coestimation of species and gene trees},
	volume = {23},
	issn = {1088-9051, 1549-5469},
	url = {http://genome.cshlp.org/content/23/2/323},
	doi = {10.1101/gr.141978.112},
	abstract = {Comparisons of gene trees and species trees are key to understanding major processes of genome evolution such as gene duplication and loss. Because current methods to reconstruct phylogenies fail to model the two-way dependency between gene trees and the species tree, they often misrepresent gene and species histories. We present a new probabilistic model to jointly infer rooted species and gene trees for dozens of genomes and thousands of gene families. We use simulations to show that this method accurately infers the species tree and gene trees, is robust to misspecification of the models of sequence and gene family evolution, and provides a precise historic record of gene duplications and losses throughout genome evolution. We simultaneously reconstruct the history of mammalian species and their genes based on 36 completely sequenced genomes, and use the reconstructed gene trees to infer the gene content and organization of ancestral mammalian genomes. We show that our method yields a more accurate picture of ancestral genomes than the trees available in the authoritative database Ensembl.},
	language = {en},
	number = {2},
	urldate = {2015-11-17},
	journal = {Genome Research},
	author = {Boussau, Bastien and Szöllősi, Gergely J. and Duret, Laurent and Gouy, Manolo and Tannier, Eric and Daubin, Vincent},
	month = feb,
	year = {2013},
	pmid = {23132911},
	pages = {323--330},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/BTM73AG4/Boussau et al. - 2013 - Genome-scale coestimation of species and gene tree.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/KF3UWMH5/323.html:text/html}
}

@article{yu_chipseeker:_2015,
	title = {{ChIPseeker}: an {R}/{Bioconductor} package for {ChIP} peak annotation, comparison and visualization},
	volume = {31},
	issn = {1367-4803, 1460-2059},
	shorttitle = {{ChIPseeker}},
	url = {http://bioinformatics.oxfordjournals.org.eproxy2.lib.hku.hk/content/31/14/2382},
	doi = {10.1093/bioinformatics/btv145},
	abstract = {Summary: ChIPseeker is an R package for annotating ChIP-seq data analysis. It supports annotating ChIP peaks and provides functions to visualize ChIP peaks coverage over chromosomes and profiles of peaks binding to TSS regions. Comparison of ChIP peak profiles and annotation are also supported. Moreover, it supports evaluating significant overlap among ChIP-seq datasets. Currently, ChIPseeker contains 15 000 bed file information from GEO database. These datasets can be downloaded and compare with user’s own data to explore significant overlap datasets for inferring co-regulation or transcription factor complex for further investigation.
Availability and implementation: ChIPseeker is released under Artistic-2.0 License. The source code and documents are freely available through Bioconductor (http://www.bioconductor.org/packages/release/bioc/html/ChIPseeker.html).
Contact: guangchuangyu@gmail.com or tqyhe@jnu.edu.cn
Supplementary information: Supplementary data are available at Bioinformatics online.},
	language = {en},
	number = {14},
	urldate = {2015-07-09},
	journal = {Bioinformatics},
	author = {Yu, Guangchuang and Wang, Li-Gen and He, Qing-Yu},
	month = jul,
	year = {2015},
	pmid = {25765347},
	pages = {2382--2383},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/34T6KRIG/Yu et al. - 2015 - ChIPseeker an RBioconductor package for ChIP pea.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/KCHE9UWN/2382.html:text/html}
}

@article{koelle_epochal_2006,
	title = {Epochal evolution shapes the phylodynamics of interpandemic influenza {A} ({H}3N2) in humans},
	volume = {314},
	issn = {1095-9203},
	doi = {10.1126/science.1132745},
	abstract = {Human influenza A (subtype H3N2) is characterized genetically by the limited standing diversity of its hemagglutinin and antigenically by clusters that emerge and replace each other within 2 to 8 years. By introducing an epidemiological model that allows for differences between the genetic and antigenic properties of the virus's hemagglutinin, we show that these patterns can arise from cluster-specific immunity alone. Central to the formulation is a genotype-to-phenotype mapping, based on neutral networks, with antigenic phenotypes, not genotypes, determining the degree of strain cross-immunity. The model parsimoniously explains well-known, as well as previously unremarked, features of interpandemic influenza dynamics and evolution. It captures the observed boom-and-bust pattern of viral evolution, with periods of antigenic stasis during which genetic diversity grows, and with episodic contraction of this diversity during cluster transitions.},
	language = {eng},
	number = {5807},
	journal = {Science (New York, N.Y.)},
	author = {Koelle, Katia and Cobey, Sarah and Grenfell, Bryan and Pascual, Mercedes},
	month = dec,
	year = {2006},
	pmid = {17185596},
	keywords = {Amino Acid Substitution, Antigenic Variation, Antigens, Viral, Computer Simulation, Cross Reactions, Disease Outbreaks, Disease Susceptibility, Epitopes, Evolution, Molecular, Genotype, Hemagglutinin Glycoproteins, Influenza Virus, Humans, Immunity, Herd, Influenza A Virus, H3N2 Subtype, Influenza, Human, Models, Biological, Models, Statistical, Phenotype, Phylogeny, Point Mutation, Polymorphism, Genetic},
	pages = {1898--1903}
}

@article{loverdo_inter-generational_2013,
	title = {{INTER}-{GENERATIONAL} {PHENOTYPIC} {MIXING} {IN} {VIRAL} {EVOLUTION}},
	volume = {67},
	issn = {0014-3820},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3676872/},
	doi = {10.1111/evo.12048},
	abstract = {Viral particles (virions) are made of genomic material packaged with proteins, drawn from the pool of proteins in the parent cell. It is well known that when virion concentrations are high, cells can be coinfected with multiple viral strains that can complement each other. Viral genomes can then interact with proteins derived from different strains, in a phenomenon known as phenotypic mixing. But phenotypic mixing is actually far more common: viruses mutate very often, and each time a mutation occurs, the parent cell contains different types of viral genomes. Due to phenotypic mixing, changes in viral phenotypes can be shifted by a generation from the mutations that cause them. In the regime of evolutionary invasion and escape, when mutations are crucial for the virus to survive, this timing can have a large influence on the probability of emergence of an adapted strain. Modeling the dynamics of viral evolution in these contexts thus requires attention to the mutational mechanism and the determinants of fitness.},
	number = {6},
	urldate = {2017-05-21},
	journal = {Evolution; international journal of organic evolution},
	author = {Loverdo, Claude and Lloyd-Smith, James O.},
	month = jun,
	year = {2013},
	pmid = {23730772},
	pmcid = {PMC3676872},
	pages = {1815--1822},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/FSAJMVAM/Loverdo and Lloyd-Smith - 2013 - INTER-GENERATIONAL PHENOTYPIC MIXING IN VIRAL EVOL.pdf:application/pdf}
}


@article{neher_prediction_2016,
	title = {Prediction, dynamics, and visualization of antigenic phenotypes of seasonal influenza viruses},
	volume = {113},
	issn = {0027-8424, 1091-6490},
	url = {http://www.pnas.org.eproxy1.lib.hku.hk/content/113/12/E1701},
	doi = {10.1073/pnas.1525578113},
	abstract = {Human seasonal influenza viruses evolve rapidly, enabling the virus population to evade immunity and reinfect previously infected individuals. Antigenic properties are largely determined by the surface glycoprotein hemagglutinin (HA), and amino acid substitutions at exposed epitope sites in HA mediate loss of recognition by antibodies. Here, we show that antigenic differences measured through serological assay data are well described by a sum of antigenic changes along the path connecting viruses in a phylogenetic tree. This mapping onto the tree allows prediction of antigenicity from HA sequence data alone. The mapping can further be used to make predictions about the makeup of the future A(H3N2) seasonal influenza virus population, and we compare predictions between models with serological and sequence data. To make timely model output readily available, we developed a web browser-based application that visualizes antigenic data on a continuously updated phylogeny.},
	language = {en},
	number = {12},
	urldate = {2017-05-21},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Neher, Richard A. and Bedford, Trevor and Daniels, Rodney S. and Russell, Colin A. and Shraiman, Boris I.},
	month = mar,
	year = {2016},
	pmid = {26951657},
	keywords = {antigenic distance, Evolution, phylogenetic tree},
	pages = {E1701--E1709},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/DS2U2FS5/Neher et al. - 2016 - Prediction, dynamics, and visualization of antigen.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/HPRS579S/E1701.html:text/html}
}

@article{lam_dissemination_2015,
	title = {Dissemination, divergence and establishment of {H}7N9 influenza viruses in {China}},
	volume = {522},
	copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
	issn = {0028-0836},
	url = {http://www.nature.com.eproxy2.lib.hku.hk/nature/journal/v522/n7554/abs/nature14348.html},
	doi = {10.1038/nature14348},
	abstract = {Since 2013 the occurrence of human infections by a novel avian H7N9 influenza virus in China has demonstrated the continuing threat posed by zoonotic pathogens. Although the first outbreak wave that was centred on eastern China was seemingly averted, human infections recurred in October 2013 (refs 3, 4, 5, 6, 7). It is unclear how the H7N9 virus re-emerged and how it will develop further; potentially it may become a long-term threat to public health. Here we show that H7N9 viruses have spread from eastern to southern China and become persistent in chickens, which has led to the establishment of multiple regionally distinct lineages with different reassortant genotypes. Repeated introductions of viruses from Zhejiang to other provinces and the presence of H7N9 viruses at live poultry markets have fuelled the recurrence of human infections. This rapid expansion of the geographical distribution and genetic diversity of the H7N9 viruses poses a direct challenge to current disease control systems. Our results also suggest that H7N9 viruses have become enzootic in China and may spread beyond the region, following the pattern previously observed with H5N1 and H9N2 influenza viruses.},
	language = {en},
	number = {7554},
	urldate = {2017-05-19},
	journal = {Nature},
	author = {Lam, Tommy Tsan-Yuk and Zhou, Boping and Wang, Jia and Chai, Yujuan and Shen, Yongyi and Chen, Xinchun and Ma, Chi and Hong, Wenshan and Chen, Yin and Zhang, Yanjun and Duan, Lian and Chen, Peiwen and Jiang, Junfei and Zhang, Yu and Li, Lifeng and Poon, Leo Lit Man and Webby, Richard J. and Smith, David K. and Leung, Gabriel M. and Peiris, Joseph S. M. and Holmes, Edward C. and Guan, Yi and Zhu, Huachen},
	month = jun,
	year = {2015},
	keywords = {influenza virus},
	pages = {102--105},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/CPZTB9H8/nature14348.html:text/html}
}

@article{gupta_using_2015,
	title = {Using the taxon-specific genes for the taxonomic classification of bacterial genomes},
	volume = {16},
	issn = {1471-2164},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4438512/},
	doi = {10.1186/s12864-015-1542-0},
	abstract = {Background
The correct taxonomic assignment of bacterial genomes is a primary and challenging task. With the availability of whole genome sequences, the gene content based approaches appear promising in inferring the bacterial taxonomy. The complete genome sequencing of a bacterial genome often reveals a substantial number of unique genes present only in that genome which can be used for its taxonomic classification.

Results
In this study, we have proposed a comprehensive method which uses the taxon-specific genes for the correct taxonomic assignment of existing and new bacterial genomes. The taxon-specific genes identified at each taxonomic rank have been successfully used for the taxonomic classification of 2,342 genomes present in the NCBI genomes, 36 newly sequenced genomes, and 17 genomes for which the complete taxonomy is not yet known. This approach has been implemented for the development of a tool ‘Microtaxi’ which can be used for the taxonomic assignment of complete bacterial genomes.

Conclusion
The taxon-specific gene based approach provides an alternate valuable methodology to carry out the taxonomic classification of newly sequenced or existing bacterial genomes.

Electronic supplementary material
The online version of this article (doi:10.1186/s12864-015-1542-0) contains supplementary material, which is available to authorized users.},
	number = {1},
	urldate = {2017-05-19},
	journal = {BMC Genomics},
	author = {Gupta, Ankit and Sharma, Vineet K},
	month = may,
	year = {2015},
	pmid = {25990029},
	pmcid = {PMC4438512},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/2EKDSBRT/Gupta and Sharma - 2015 - Using the taxon-specific genes for the taxonomic c.pdf:application/pdf}
}

@article{bosi_comparative_2016,
	title = {Comparative genome-scale modelling of {Staphylococcus} aureus strains identifies strain-specific metabolic capabilities linked to pathogenicity},
	volume = {113},
	issn = {0027-8424},
	url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4932939/},
	doi = {10.1073/pnas.1523199113},
	abstract = {Comparative analysis of multiple strains within a species is a powerful way to uncover pathoadaptive genetic acquisitions. Hundreds of genome sequences are now available for the human pathogen Staphylococcus aureus, mostly known for its antibiotic-resistant variants that threaten the emergence of panresistant superbugs. In this study, genome-scale models of metabolism are used to analyze the shared and unique metabolic capabilities of this pathogen and its strain-specific variants. The models are used to distinguish S. aureus strains responsible for severe infections based solely on growth capabilities and presence of different virulence factors. The results identify metabolic similarities and differences between S. aureus strains that provide insights into the epidemiology of S. aureus and may help to combat its spread., Staphylococcus aureus is a preeminent bacterial pathogen capable of colonizing diverse ecological niches within its human host. We describe here the pangenome of S. aureus based on analysis of genome sequences from 64 strains of S. aureus spanning a range of ecological niches, host types, and antibiotic resistance profiles. Based on this set, S. aureus is expected to have an open pangenome composed of 7,411 genes and a core genome composed of 1,441 genes. Metabolism was highly conserved in this core genome; however, differences were identified in amino acid and nucleotide biosynthesis pathways between the strains. Genome-scale models (GEMs) of metabolism were constructed for the 64 strains of S. aureus. These GEMs enabled a systems approach to characterizing the core metabolic and panmetabolic capabilities of the S. aureus species. All models were predicted to be auxotrophic for the vitamins niacin (vitamin B3) and thiamin (vitamin B1), whereas strain-specific auxotrophies were predicted for riboflavin (vitamin B2), guanosine, leucine, methionine, and cysteine, among others. GEMs were used to systematically analyze growth capabilities in more than 300 different growth-supporting environments. The results identified metabolic capabilities linked to pathogenic traits and virulence acquisitions. Such traits can be used to differentiate strains responsible for mild vs. severe infections and preference for hosts (e.g., animals vs. humans). Genome-scale analysis of multiple strains of a species can thus be used to identify metabolic determinants of virulence and increase our understanding of why certain strains of this deadly pathogen have spread rapidly throughout the world.},
	number = {26},
	urldate = {2017-05-19},
	journal = {Proceedings of the National Academy of Sciences of the United States of America},
	author = {Bosi, Emanuele and Monk, Jonathan M. and Aziz, Ramy K. and Fondi, Marco and Nizet, Victor and Palsson, Bernhard Ø.},
	month = jun,
	year = {2016},
	pmid = {27286824},
	pmcid = {PMC4932939},
	pages = {E3801--E3809},
	file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/V5GBJTVP/Bosi et al. - 2016 - Comparative genome-scale modelling of Staphylococc.pdf:application/pdf}
}

@article{he_emergence_2013,
	title = {Emergence, {Circulation}, and {Spatiotemporal} {Phylogenetic} {Analysis} of {Coxsackievirus} {A}6- and {Coxsackievirus} {A}10-{Associated} {Hand}, {Foot}, and {Mouth} {Disease} {Infections} from 2008 to 2012 in {Shenzhen}, {China}},
	volume = {51},
	issn = {0095-1137, 1098-660X},
	url = {http://jcm.asm.org/content/51/11/3560},
	doi = {10.1128/JCM.01231-13},
	abstract = {Sporadic hand, foot, and mouth disease (HFMD) outbreaks and other infectious diseases in recent years have frequently been associated with certain human enterovirus (HEV) serotypes. This study explored the prevalences and genetic characteristics of non-HEV71 and non-coxsackievirus A16 (CV-A16) human enterovirus-associated HFMD infections in Shenzhen, China. A total of 2,411 clinical stool specimens were collected from hospital-based surveillance for HFMD from 2008 to 2012. The detection of HEV was performed by real-time reverse transcription-PCR (RT-PCR) and RT-seminested PCR, and spatiotemporal phylogenetic analysis was performed based on the VP1 genes. A total of 1,803 (74.8\%) strains comprising 28 different serotypes were detected. In the past 5 years, the predominant serotypes were HEV71 (60.0\%), followed by CV-A16 (21.2\%) and two uncommon serotypes, CV-A6 (13.0\%) and CV-A10 (3.3\%). However, CV-A6 replaced CV-A16 as the second most common serotype between 2010 and 2012. As an emerging pathogen, CV-A6 became as common a causative agent of HFMD as HEV71 in Shenzhen in 2012. Phylogenetic analysis revealed that little variation occurred in the Chinese HEV71 and CV-A16 strains. The genetic characteristics of the Chinese CV-A6 and CV-A10 strains displayed geographic differences. The CV-A6 and CV-A10 strains circulating in Shenzhen likely originated in Europe. It was found that human enteroviruses have a high mutation rate due to evolutionary pressure and frequent recombination (3.2 × 10−3 to 6.4 ×10−3 substitutions per site per year for HEV71, CV-A6, CV-A16, and CV-A10). Since certain serotypes are potential threats to the public health, this study provides further insights into the significance of the epidemiological surveillance of HFMD.},
	language = {en},
	number = {11},
	urldate = {2017-05-19},
	journal = {Journal of Clinical Microbiology},
	author = {He, Ya-Qing and Chen, Long and Xu, Wen-Bo and Yang, Hong and Wang, Han-Zhong and Zong, Wen-Ping and Xian, Hui-Xia and Chen, Hui-Ling and Yao, Xiang-Jie and Hu, Zhang-Li and Luo, Min and Zhang, Hai-Long and Ma, Han-Wu and Cheng, Jin-Quan and Feng, Qian-Jin and Zhao, De-Jian},
	month = nov,
	year = {2013},
	pmid = {23966496},
	pages = {3560--3566},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/EXVQQVG7/He et al. - 2013 - Emergence, Circulation, and Spatiotemporal Phyloge.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/HIE48ITK/3560.html:text/html}
}

@article{schon_age_2015,
	title = {Age and origin of {Australian} {Bennelongia} ({Crustacea}, {Ostracoda})},
	volume = {750},
	issn = {0018-8158, 1573-5117},
	url = {https://link-springer-com.eproxy2.lib.hku.hk/article/10.1007/s10750-014-2159-z},
	doi = {10.1007/s10750-014-2159-z},
	abstract = {South-western Australia holds an exceptional number of endemic taxa and has been recognized as a biodiversity hotspot at a global scale. We report a much higher diversity in the genus Bennelongia (Ostracoda) in Western than in eastern Australia. Using mitochondrial COI sequence data for phylogenies, relative age estimates, lineage-through-time plots, and reconstructions of ancestral distributions, we test four hypotheses that might explain the higher diversity and endemicity in Western Australia. (1) We find no evidence for ancient relictualism as most Bennelongia species are probably of Miocene age. (2) There are also no apparent links to vicariant events: speciation has mostly taken place in Western Australia and has been ongoing through the evolutionary history of Bennelongia. (3) Dispersal has apparently not negatively affected Western Australian Bennelongia endemicity although these ostracods produce drought-resistant eggs. We report one case of recent long distance dispersal in B. dedeckkeri with genetically identical populations occurring more than 2,000 km apart. (4) Since speciation has been ongoing, there is no evidence of recent explosive speciation through genetic isolation. The underlying mechanisms of Bennelongia speciation thus remain elusive, although speciation has mostly occurred during a period of increasing aridification of Australia.},
	language = {en},
	number = {1},
	urldate = {2017-05-19},
	journal = {Hydrobiologia},
	author = {Schön, Isa and Shearn, Rylan and Martens, Koen and Koenders, Annette and Halse, Stuart},
	month = may,
	year = {2015},
	pages = {125--146},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/7FU7975K/Schön et al. - 2015 - Age and origin of Australian Bennelongia (Crustace.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/VB4H9GZE/s10750-014-2159-z.html:text/html}
}

@article{maddison_nexus:_1997,
	title = {Nexus: {An} {Extensible} {File} {Format} for {Systematic} {Information}},
	volume = {46},
	issn = {1063-5157},
	shorttitle = {Nexus},
	url = {https://academic-oup-com.eproxy1.lib.hku.hk/sysbio/article/46/4/590/1629695/Nexus-An-Extensible-File-Format-for-Systematic},
	doi = {10.1093/sysbio/46.4.590},
	number = {4},
	urldate = {2017-05-19},
	journal = {Systematic Biology},
	author = {Maddison, David R. and Swofford, David L. and Maddison, Wayne P. and Cannatella, David},
	month = dec,
	year = {1997},
	pages = {590--621},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/Z62ACQ4I/Maddison et al. - 1997 - Nexus An Extensible File Format for Systematic In.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/W9H34AN2/Nexus-An-Extensible-File-Format-for-Systematic.html:text/html}
}

@article{felsenstein_phylip_1989,
	title = {{PHYLIP} - {Phylogeny} {Inference} {Package} ({Version} 3.2)},
	volume = {5},
	journal = {Cladistics},
	author = {Felsenstein, Joseph},
	year = {1989},
	keywords = {bibtex-import, inference, Phylogeny, Software},
	pages = {164--166}
}

@article{retief_phylogenetic_2000,
	title = {Phylogenetic analysis using {PHYLIP}},
	volume = {132},
	issn = {1064-3745},
	language = {ENG},
	journal = {Methods in Molecular Biology (Clifton, N.J.)},
	author = {Retief, J. D.},
	year = {2000},
	pmid = {10547839},
	keywords = {Amino Acid Sequence, Animals, Base Sequence, Database Management Systems, DNA, Humans, Likelihood Functions, Molecular Sequence Data, Phylogeny, Sequence Alignment, Sequence Homology, Amino Acid},
	pages = {243--258}
}

@article{wilgenbusch_inferring_2003,
	title = {Inferring evolutionary trees with {PAUP}*},
	volume = {Chapter 6},
	issn = {1934-340X},
	doi = {10.1002/0471250953.bi0604s00},
	abstract = {This unit provides a general description of reconstructing evolutionary trees using PAUP* 4.0. The protocol takes users through an example analysis of mitochondrial DNA sequence data using the parsimony and the likelihood criteria to infer optimal trees. The protocol also discusses searching options available in PAUP* and demonstrates how to import non-NEXUS formats. Finally, a general discussion is given regarding the pros and cons of the "model-free" and "model-based" methods used throughout the protocol.},
	language = {ENG},
	journal = {Current Protocols in Bioinformatics},
	author = {Wilgenbusch, James C. and Swofford, David},
	month = feb,
	year = {2003},
	pmid = {18428704},
	keywords = {Computer Simulation, DNA Mutational Analysis, Evolution, Molecular, Genetic Variation, Models, Genetic, Phylogeny, Sequence Alignment, Sequence Analysis, DNA, Software},
	pages = {Unit 6.4}
}

@article{schmidt_tree-puzzle:_2002,
	title = {{TREE}-{PUZZLE}: maximum likelihood phylogenetic analysis using quartets and parallel computing},
	volume = {18},
	issn = {1367-4803},
	shorttitle = {{TREE}-{PUZZLE}},
	abstract = {SUMMARY: TREE-PUZZLE is a program package for quartet-based maximum-likelihood phylogenetic analysis (formerly PUZZLE, Strimmer and von Haeseler, Mol. Biol. Evol., 13, 964-969, 1996) that provides methods for reconstruction, comparison, and testing of trees and models on DNA as well as protein sequences. To reduce waiting time for larger datasets the tree reconstruction part of the software has been parallelized using message passing that runs on clusters of workstations as well as parallel computers.
AVAILABILITY: http://www.tree-puzzle.de. The program is written in ANSI C. TREE-PUZZLE can be run on UNIX, Windows and Mac systems, including Mac OS X. To run the parallel version of PUZZLE, a Message Passing Interface (MPI) library has to be installed on the system. Free MPI implementations are available on the Web (cf. http://www.lam-mpi.org/mpi/implementations/).},
	language = {ENG},
	number = {3},
	journal = {Bioinformatics (Oxford, England)},
	author = {Schmidt, Heiko A. and Strimmer, Korbinian and Vingron, Martin and von Haeseler, Arndt},
	month = mar,
	year = {2002},
	pmid = {11934758},
	keywords = {Algorithms, Computing Methodologies, Database Management Systems, Databases, Genetic, Internet, Likelihood Functions, Mathematical Computing, Molecular Sequence Data, Phylogeny, Sequence Analysis, Software},
	pages = {502--504}
}


@article{lam_genesis_2013,
	title = {The genesis and source of the {H}7N9 influenza viruses causing human infections in {China}},
	volume = {502},
	copyright = {© 2013 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
	issn = {0028-0836},
	url = {http://www.nature.com.eproxy1.lib.hku.hk/nature/journal/v502/n7470/full/nature12515.html},
	doi = {10.1038/nature12515},
	abstract = {A novel H7N9 influenza A virus first detected in March 2013 has since caused more than 130 human infections in China, resulting in 40 deaths. Preliminary analyses suggest that the virus is a reassortant of H7, N9 and H9N2 avian influenza viruses, and carries some amino acids associated with mammalian receptor binding, raising concerns of a new pandemic. However, neither the source populations of the H7N9 outbreak lineage nor the conditions for its genesis are fully known. Using a combination of active surveillance, screening of virus archives, and evolutionary analyses, here we show that H7 viruses probably transferred from domestic duck to chicken populations in China on at least two independent occasions. We show that the H7 viruses subsequently reassorted with enzootic H9N2 viruses to generate the H7N9 outbreak lineage, and a related previously unrecognized H7N7 lineage. The H7N9 outbreak lineage has spread over a large geographic region and is prevalent in chickens at live poultry markets, which are thought to be the immediate source of human infections. Whether the H7N9 outbreak lineage has, or will, become enzootic in China and neighbouring regions requires further investigation. The discovery here of a related H7N7 influenza virus in chickens that has the ability to infect mammals experimentally, suggests that H7 viruses may pose threats beyond the current outbreak. The continuing prevalence of H7 viruses in poultry could lead to the generation of highly pathogenic variants and further sporadic human infections, with a continued risk of the virus acquiring human-to-human transmissibility.},
	language = {en},
	number = {7470},
	urldate = {2017-06-07},
	journal = {Nature},
	author = {Lam, Tommy Tsan-Yuk and Wang, Jia and Shen, Yongyi and Zhou, Boping and Duan, Lian and Cheung, Chung-Lam and Ma, Chi and Lycett, Samantha J. and Leung, Connie Yin-Hung and Chen, Xinchun and Li, Lifeng and Hong, Wenshan and Chai, Yujuan and Zhou, Linlin and Liang, Huyi and Ou, Zhihua and Liu, Yongmei and Farooqui, Amber and Kelvin, David J. and Poon, Leo L. M. and Smith, David K. and Pybus, Oliver G. and Leung, Gabriel M. and Shu, Yuelong and Webster, Robert G. and Webby, Richard J. and Peiris, Joseph S. M. and Rambaut, Andrew and Zhu, Huachen and Guan, Yi},
	month = oct,
	year = {2013},
	keywords = {influenza virus, phylogenetics},
	pages = {241--244},
	file = {Snapshot:/Volumes/HOME/Zotero/storage/V93NHZIM/nature12515.html:text/html}
}

@article{goldman_codon-based_1994,
	title = {A codon-based model of nucleotide substitution for protein-coding {DNA} sequences},
	volume = {11},
	issn = {0737-4038},
	abstract = {A codon-based model for the evolution of protein-coding DNA sequences is presented for use in phylogenetic estimation. A Markov process is used to describe substitutions between codons. Transition/transversion rate bias and codon usage bias are allowed in the model, and selective restraints at the protein level are accommodated using physicochemical distances between the amino acids coded for by the codons. Analyses of two data sets suggest that the new codon-based model can provide a better fit to data than can nucleotide-based models and can produce more reliable estimates of certain biologically important measures such as the transition/transversion rate ratio and the synonymous/nonsynonymous substitution rate ratio.},
	language = {eng},
	number = {5},
	journal = {Molecular Biology and Evolution},
	author = {Goldman, N. and Yang, Z.},
	month = sep,
	year = {1994},
	pmid = {7968486},
	keywords = {Animals, Chemistry, Physical, Codon, DNA, Genes, Globins, Glucose-1-Phosphate Adenylyltransferase, Likelihood Functions, Mammals, Markov Chains, Models, Genetic, Nucleotidyltransferases, Phylogeny, Physicochemical Phenomena, Point Mutation, Proteins},
	pages = {725--736}
}


@article{liang_expansion_2014,
	title = {Expansion of genotypic diversity and establishment of 2009 {H}1N1 pandemic-origin internal genes in pigs in {China}},
	issn = {0022-538X, 1098-5514},
	url = {http://jvi.asm.org.eproxy1.lib.hku.hk/content/early/2014/07/03/JVI.01327-14},
	doi = {10.1128/JVI.01327-14},
	abstract = {‘Two-way' transmission of influenza viruses between humans and swine has been frequently observed and the occurrence of the 2009 H1N1 pandemic influenza (pdm/09) demonstrated that swine-origin viruses could facilitate the genesis of a pandemic strain. Although multiple introductions to and reassortment in swine of the pdm/09 virus have been repeatedly reported in both Eurasia and the Americas, its long-term impact on the development of swine influenza viruses (SIVs) has not been systematically explored. Our comprehensive evolutionary studies on the complete genomes of 387 SIVs obtained from 2009 to 2012 in influenza surveillance in China revealed 17 reassortant genotypes with pdm/09-origin genes. Even though the entire 2009 pandemic virus and its surface genes cannot persist, its internal genes have becoming established and are now the predominant lineages in pigs in the region. The main persistent pdm/09-origin reassortant forms had at least 5 pdm/09-origin internal genes and their surface genes primarily of European avian-like (EA) or human H3N2-like SIV origin. These findings represent a marked change to the evolutionary patterns and ecosystem of SIVs in China. It is possible that the pdm/09-origin internal genes may be in the process of replacing EA- or triple reassortant-like internal genes. These alterations to the SIV gene pool need to be continually monitored to assess changes in the potential for SIVs to transmit to humans.
Importance Shortly after the emergence of the 2009 pandemic H1N1 (pdm/09) influenza virus, it was transmitted from humans to pigs and this continues to occur around the world. Many reassortants between pdm/09-origin viruses and enzootic swine influenza viruses (SIVs) have been detected. However, the long-term impact of pdm/09-origin viruses on the SIV gene pool, which could lead to the generation of influenza viruses with the potential to infect humans, has not been systematically examined. From extensive surveillance of SIVs over a 38-month period in southern China, it was found that, although neither complete pdm/09 viruses nor their surface genes could persist in pigs, their internal genes did persist. Over the survey period, these internal genes became predominant, potentially replacing those of the enzootic SIV lineages. The altered diversity of the SIV gene pool needs to be closely monitored for changes in the potential of SIVs to transmit to humans.},
	language = {en},
	urldate = {2017-02-15},
	journal = {Journal of Virology},
	author = {Liang, Huyi and Lam, Tommy Tsan-Yuk and Fan, Xiaohui and Chen, Xinchun and Zeng, Yu and Zhou, Ji and Duan, Lian and Tse, Maying and Chan, Chung-Hei and Li, Lifeng and Leung, Tak-Ying and Yip, Chun-Hung and Cheung, Chung-Lam and Zhou, Boping and Smith, David K. and Poon, Leo Lit-Man and Peiris, Malik and Guan, Yi and Zhu, Huachen},
	month = jul,
	year = {2014},
	pmid = {25008935},
	pages = {JVI.01327--14},
	file = {Full Text PDF:/Volumes/HOME/Zotero/storage/563P6IBZ/Liang et al. - 2014 - Expansion of genotypic diversity and establishment.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/T28KDTGK/JVI.01327-14.html:text/html}
}


@article{venkatesh_avian_2018,
	title = {Avian Influenza Viruses in Wild Birds: Virus Evolution in a Multihost Ecosystem},
	volume = {92},
	issn = {1098-5514},
	doi = {10.1128/JVI.00433-18},
	shorttitle = {Avian Influenza Viruses in Wild Birds},
	abstract = {Wild ducks and gulls are the major reservoirs for avian influenza A viruses ({AIVs}). The mechanisms that drive {AIV} evolution are complex at sites where various duck and gull species from multiple flyways breed, winter, or stage. The Republic of Georgia is located at the intersection of three migratory flyways: the Central Asian flyway, the East Africa/West Asia flyway, and the Black Sea/Mediterranean flyway. For six complete study years (2010 to 2016), we collected {AIV} samples from various duck and gull species that breed, migrate, and overwinter in Georgia. We found a substantial subtype diversity of viruses that varied in prevalence from year to year. Low-pathogenic {AIV} ({LPAIV}) subtypes included H1N1, H2N3, H2N5, H2N7, H3N8, H4N2, H6N2, H7N3, H7N7, H9N1, H9N3, H10N4, H10N7, H11N1, H13N2, H13N6, H13N8, and H16N3, and two highly pathogenic {AIVs} ({HPAIVs}) belonging to clade 2.3.4.4, H5N5 and H5N8, were found. Whole-genome phylogenetic trees showed significant host species lineage restriction for nearly all gene segments and significant differences in observed reassortment rates, as defined by quantification of phylogenetic incongruence, and in nucleotide sequence diversity for {LPAIVs} among different host species. Hemagglutinin clade 2.3.4.4 H5N8 viruses, which circulated in Eurasia during 2014 and 2015, did not reassort, but analysis after their subsequent dissemination during 2016 and 2017 revealed reassortment in all gene segments except {NP} and {NS}. Some virus lineages appeared to be unrelated to {AIVs} in wild bird populations in other regions, with maintenance of local {AIVs} in Georgia, whereas other lineages showed considerable genetic interrelationships with viruses circulating in other parts of Eurasia and Africa, despite relative undersampling in the area.{IMPORTANCE} Waterbirds (e.g., gulls and ducks) are natural reservoirs of avian influenza viruses ({AIVs}) and have been shown to mediate the dispersal of {AIVs} at intercontinental scales during seasonal migration. The segmented genome of influenza viruses enables viral {RNA} from different lineages to mix or reassort when two viruses infect the same host. Such reassortant viruses have been identified in most major human influenza pandemics and several poultry outbreaks. Despite their importance, we have only recently begun to understand {AIV} evolution and reassortment in their natural host reservoirs. This comprehensive study illustrates {AIV} evolutionary dynamics within a multihost ecosystem at a stopover site where three major migratory flyways intersect. Our analysis of this ecosystem over a 6-year period provides a snapshot of how these viruses are linked to global {AIV} populations. Understanding the evolution of {AIVs} in the natural host is imperative to mitigating both the risk of incursion into domestic poultry and the potential risk to mammalian hosts, including humans.},
	number = {15},
	journaltitle = {Journal of Virology},
	shortjournal = {J. Virol.},
	author = {Venkatesh, Divya and Poen, Marjolein J. and Bestebroer, Theo M. and Scheuer, Rachel D. and Vuong, Oanh and Chkhaidze, Mzia and Machablishvili, Anna and Mamuchadze, Jimsher and Ninua, Levan and Fedorova, Nadia B. and Halpin, Rebecca A. and Lin, Xudong and Ransier, Amy and Stockwell, Timothy B. and Wentworth, David E. and Kriti, Divya and Dutta, Jayeeta and van Bakel, Harm and Puranik, Anita and Slomka, Marek J. and Essen, Steve and Brown, Ian H. and Fouchier, Ron A. M. and Lewis, Nicola S.},
	date = {2018},
	pmid = {29769347},
	pmcid = {PMC6052287},
	keywords = {Animals, Genome, Viral, evolution, avian influenza, Birds, ecology, Ecosystem, Evolution, Molecular, influenza, Influenza A virus, Influenza in Birds, phylogenetics, Phylogeny, viruses}
}

@article{larsen_identification_2019,
	title = {Identification and tissue-expression profiling of novel chicken c-type lectin-like domain containing proteins as potential targets for carbohydrate-based vaccine strategies},
	volume = {114},
	issn = {0161-5890},
	url = {http://www.sciencedirect.com/science/article/pii/S0161589019304407},
	doi = {10.1016/j.molimm.2019.07.022},
	abstract = {C-type lectin-like domain containing proteins ({CTLDcps}) mainly bind carbohydrate-based ligands, but also other ligands. {CTLDcps} are involved in several biological processes including cell adhesion, cell-cell interactions, and pathogen recognition. Pathogen recognition by myeloid cells, e.g. dendritic cells ({DCs}), can be facilitated through cell surface expressed {CTLDcps}. Cell surface expressed {CTLDcps} have been exploited in vaccine designs for specific targeting of human and mouse {DCs} using antibodies. In recent years, however, {DC} targeting using carbohydrate-based vaccines has gained interest due to low production cost, limited immunogenicity, and possibility of multivalent adjustment. In chicken, however, only a few {CTLDcps} have been identified. Identifying and annotating additional chicken {CTLDcps} ({chCTLDcps}) is needed to exploit carbohydrate-mediated {DC} targeting in chicken. Therefore, we searched the chicken {GRCg}6a assembly for novel {chCTLDcps}. We identified 28 {chCTLDcps} of which 10 had previously been described and also experimentally validated. {RNA}-seq and {RT}-{qPCR} confirmed {mRNA} expression of the remaining 18 identified {chCTLDcps}. A group of highly related {chCTLDcps}, moreover, was shown to be avian-specific and comprise novel members mapped to the proposed chicken natural killer gene complex. Two {chCTLDcps}, {chCLEC}17AL-A and {chCLEC}17AL-B, were found to share a recent common ancestor with {CLEC}17A. Putative mannose or fucose-binding sequence motifs, {EPN} and {WND}, were found in the {CTLD} of {chCLEC}17AL-A. Both contained intracellular internalisation and signalling sequence motifs. In conclusion, several {chCTLDcps} were identified and their expression confirmed. Both {chCLEC}17AL-A and -B showed promise as potential targets in carbohydrate-based chicken vaccine strategies. Determination of {DC}-specific expression of {chCLEC}17AL-A and -B, thus, might prove useful in chicken vaccinology.},
	pages = {216--225},
	journaltitle = {Molecular Immunology},
	shortjournal = {Molecular Immunology},
	author = {Larsen, Frederik T. and Bed’Hom, Bertrand and Guldbrandtsen, Bernt and Dalgaard, Tina S.},
	urldate = {2020-05-19},
	date = {2019-10-01},
	langid = {english},
	keywords = {C-type lectin-like domain, Chicken, Chicken {NKC}, Containing proteins, Pattern-Recognition receptors, Vaccine design}
}

@article{GraPhlAn,
    title={Compact graphical representation of phylogenetic data and metadata with GraPhlAn},
    author={Asnicar, Francesco and Weingart, George and Tickle, Timothy L and Huttenhower, Curtis and Segata, Nicola},
    journal={PeerJ},
    volume={3},
    pages={e1029},
    year={2015},
    doi={10.7717/peerj.1029},
    url={https://peerj.com/articles/1029/},
    publisher={PeerJ Inc.}
}

@article{morgan2013HMP,
    title="Biodiversity and functional genomics in the human microbiome",
    author="Xochitl C. {Morgan} and Nicola {Segata} and Curtis {Huttenhower}",
    journal="Trends in Genetics",
    volume="29",
    number="1",
    pages="51--58",
    doi={10.1016/J.TIG.2012.09.005},
    url={https://www.sciencedirect.com/science/article/pii/S016895251200145X?via%3Dihub},
    year="2013"
}

@article{Chowe:_2020,
    author = {Chow, Nancy A. and Mu{\~n}oz, Jos{\'e} F. and Gade, Lalitha and Berkow, Elizabeth L. and Li, Xiao and Welsh, Rory M. and Forsberg, Kaitlin and Lockhart, Shawn R. and Adam, Rodney and Alanio, Alexandre and Alastruey-Izquierdo, Ana and Althawadi, Sahar and Ara{\'u}z, Ana Bel{\'e}n and Ben-Ami, Ronen and Bharat, Amrita and Calvo, Belinda and Desnos-Ollivier, Marie and Escand{\'o}n, Patricia and Gardam, Dianne and Gunturu, Revathi and Heath, Christopher H. and Kurzai, Oliver and Martin, Ronny and Litvintseva, Anastasia P. and Cuomo, Christina A.},
    editor = {Butler, Geraldine and Nielsen, Kirsten},
    title = {Tracing the Evolutionary History and Global Expansion of Candida auris Using Population Genomic Analyses},
    volume = {11},
    number = {2},
    elocation-id = {e03364-19},
    year = {2020},
    doi = {10.1128/mBio.03364-19},
    publisher = {American Society for Microbiology},
    abstract = {Candida auris has emerged globally as a multidrug-resistant yeast that can spread via nosocomial transmission. An initial phylogenetic study of isolates from Japan, India, Pakistan, South Africa, and Venezuela revealed four populations (clades I, II, III, and IV) corresponding to these geographic regions. Since this description, C. auris has been reported in more than 30 additional countries. To trace this global emergence, we compared the genomes of 304 C. auris isolates from 19 countries on six continents. We found that four predominant clades persist across wide geographic locations. We observed phylogeographic mixing in most clades; clade IV, with isolates mainly from South America, demonstrated the strongest phylogeographic substructure. C. auris isolates from two clades with opposite mating types were detected contemporaneously in a single health care facility in Kenya. We estimated a Bayesian molecular clock phylogeny and dated the origin of each clade within the last 360 years; outbreak-causing clusters from clades I, III, and IV originated 36 to 38 years ago. We observed high rates of antifungal resistance in clade I, including four isolates resistant to all three major classes of antifungals. Mutations that contribute to resistance varied between the clades, with Y132F in ERG11 as the most widespread mutation associated with azole resistance and S639P in FKS1 for echinocandin resistance. Copy number variants in ERG11 predominantly appeared in clade III and were associated with fluconazole resistance. These results provide a global context for the phylogeography, population structure, and mechanisms associated with antifungal resistance in C. auris.IMPORTANCE In less than a decade, C. auris has emerged in health care settings worldwide; this species is capable of colonizing skin and causing outbreaks of invasive candidiasis. In contrast to other Candida species, C. auris is unique in its ability to spread via nosocomial transmission and its high rates of drug resistance. As part of the public health response, whole-genome sequencing has played a major role in characterizing transmission dynamics and detecting new C. auris introductions. Through a global collaboration, we assessed genome evolution of isolates of C. auris from 19 countries. Here, we described estimated timing of the expansion of each C. auris clade and of fluconazole resistance, characterized discrete phylogeographic population structure of each clade, and compared genome data to sensitivity measurements to describe how antifungal resistance mechanisms vary across the population. These efforts are critical for a sustained, robust public health response that effectively utilizes molecular epidemiology.},
    URL = {https://mbio.asm.org/content/11/2/e03364-19},
    eprint = {https://mbio.asm.org/content/11/2/e03364-19.full.pdf},
    journal = {mBio}
}

@article{RN46:_2015,
   author = {Wong, Vanessa K. and Baker, Stephen and Pickard, Derek J. and Parkhill, Julian and Page, Andrew J. and Feasey, Nicholas A. and Kingsley, Robert A. and Thomson, Nicholas R. and Keane, Jacqueline A. and Weill, François-Xavier and Edwards, David J. and Hawkey, Jane and Harris, Simon R. and Mather, Alison E. and Cain, Amy K. and Hadfield, James and Hart, Peter J. and Thieu, Nga Tran Vu and Klemm, Elizabeth J. and Glinos, Dafni A. and Breiman, Robert F. and Watson, Conall H. and Kariuki, Samuel and Gordon, Melita A. and Heyderman, Robert S. and Okoro, Chinyere and Jacobs, Jan and Lunguya, Octavie and Edmunds, W. John and Msefula, Chisomo and Chabalgoity, Jose A. and Kama, Mike and Jenkins, Kylie and Dutta, Shanta and Marks, Florian and Campos, Josefina and Thompson, Corinne and Obaro, Stephen and MacLennan, Calman A. and Dolecek, Christiane and Keddy, Karen H. and Smith, Anthony M. and Parry, Christopher M. and Karkey, Abhilasha and Mulholland, E. Kim and Campbell, James I. and Dongol, Sabina and Basnyat, Buddha and Dufour, Muriel and Bandaranayake, Don and Naseri, Take Toleafoa and Singh, Shalini Pravin and Hatta, Mochammad and Newton, Paul and Onsare, Robert S. and Isaia, Lupeoletalalei and Dance, David and Davong, Viengmon and Thwaites, Guy and Wijedoru, Lalith and Crump, John A. and De Pinna, Elizabeth and Nair, Satheesh and Nilles, Eric J. and Thanh, Duy Pham and Turner, Paul and Soeng, Sona and Valcanis, Mary and Powling, Joan and Dimovski, Karolina and Hogg, Geoff and Farrar, Jeremy and Holt, Kathryn E. and Dougan, Gordon},
   title = {Phylogeographical analysis of the dominant multidrug-resistant H58 clade of Salmonella Typhi identifies inter- and intracontinental transmission events},
   journal = {Nature Genetics},
   volume = {47},
   number = {6},
   pages = {632-639},
   ISSN = {1546-1718},
   DOI = {10.1038/ng.3281},
   url = {https://doi.org/10.1038/ng.3281},
   year = {2015},
   type = {Journal Article}
}

@article{kostic2012genomic,
   title={Genomic analysis identifies association of Fusobacterium with colorectal carcinoma},
   author={Kostic, Aleksandar D and Gevers, Dirk and Pedamallu, Chandra Sekhar and Michaud, Monia and Duke, Fujiko and Earl, Ashlee M and Ojesina, Akinyemi I and Jung, Joonil and Bass, Adam J and Tabernero, Josep and others},
   journal={Genome research},
   volume={22},
   number={2},
   pages={292--298},
   year={2012},
   publisher={Cold Spring Harbor Lab},
   doi={10.1101/gr.126573.111},
   url={https://genome.cshlp.org/content/22/2/292.long}
}

@article{He289,
   author = {He, Zhen and Gharaibeh, Raad Z and Newsome, Rachel C and Pope, Jllian L and Dougherty, Michael W and Tomkovich, Sarah and Pons, Benoit and Mirey, Gladys and Vignard, Julien and Hendrixson, David R and Jobin, Christian},
   title = { Campylobacter jejuni promotes colorectal tumorigenesis through the action of cytolethal distending toxin},
   volume = {68},
   number = {2},
   pages = {289--300},
   year = {2019},
   doi = {10.1136/gutjnl-2018-317200},
   publisher = {BMJ Publishing Group},
   abstract = {Objective Campylobacter jejuni produces a genotoxin, cytolethal distending toxin (CDT), which has DNAse activity and causes DNA double-strand breaks. Although C. jejuni infection has been shown to promote intestinal inflammation, the impact of this bacterium on carcinogenesis has never been examined.Design Germ-free (GF) ApcMin/+ mice, fed with 1\% dextran sulfate sodium, were used to test tumorigenesis potential of CDT-producing C. jejuni. Cells and enteroids were exposed to bacterial lysates to determine DNA damage capacity via γH2AX immunofluorescence, comet assay and cell cycle assay. To examine the interplay of CDT-producing C. jejuni, gut microbiome and host in tumorigenesis, colonic RNA-sequencing and faecal 16S rDNA sequencing were performed. Rapamycin was administrated to investigate the prevention of CDT-producing C. jejuni-induced tumorigenesis.Results GF ApcMin/+ mice colonised with human clinical isolate C. jejuni81{\textendash}176 developed significantly more and larger tumours when compared with uninfected mice. C. jejuni with a mutated cdtB subunit, mutcdtB, attenuated C. jejuni-induced tumorigenesis in vivo and decreased DNA damage response in cells and enteroids. C. jejuni infection induced expression of hundreds of colonic genes, with 22 genes dependent on the presence of cdtB. The C. jejuni-infected group had a significantly different microbial gene expression profile compared with the mutcdtB group as shown by metatranscriptomic data, and different microbial communities as measured by 16S rDNA sequencing. Finally, rapamycin could diminish the tumorigenic capability of C. jejuni.Conclusion Human clinical isolate C. jejuni 81{\textendash}176 promotes colorectal cancer and induces changes in microbial composition and transcriptomic responses, a process dependent on CDT production.},
   issn = {0017-5749},
   URL = {https://gut.bmj.com/content/68/2/289},
   eprint = {https://gut.bmj.com/content/68/2/289.full.pdf},
   journal = {Gut}
}

@article{wu2013dysbiosis,
   title={Dysbiosis signature of fecal microbiota in colorectal cancer patients},
   author={Wu, Na and Yang, Xi and Zhang, Ruifen and Li, Jun and Xiao, Xue and Hu, Yongfei and Chen, Yanfei and Yang, Fengling and Lu, Na and Wang, Zhiyun and others},
   journal={Microbial ecology},
   volume={66},
   number={2},
   pages={462--470},
   year={2013},
   doi={10.1007/s00248-013-0245-9},
   url={https://link.springer.com/article/10.1007/s00248-013-0245-9},
   publisher={Springer}
}

@article{amer2017microbiome,
   title={The microbiome of potentially malignant oral leukoplakia exhibits enrichment for Fusobacterium, Leptotrichia, Campylobacter, and Rothia species},
   author={Amer, Abdrazak and Galvin, Sheila and Healy, Claire M and Moran, Gary P},
   journal={Frontiers in microbiology},
   volume={8},
   pages={2391},
   year={2017},
   doi={10.3389/fmicb.2017.02391},
   url={https://doi.org/10.3389/fmicb.2017.02391},
   publisher={Frontiers}
}

@article{reroot_review,
    author = {Czech, Lucas and Huerta-Cepas, Jaime and Stamatakis, Alexandros},
    title = "{A Critical Review on the Use of Support Values in Tree Viewers and Bioinformatics Toolkits}",
    journal = {Molecular Biology and Evolution},
    volume = {34},
    number = {6},
    pages = {1535-1542},
    year = {2017},
    month = {03},
    abstract = "{Phylogenetic trees are routinely visualized to present and interpret the evolutionary relationships of species. Most empirical evolutionary data studies contain a visualization of the inferred tree with branch support values. Ambiguous semantics in tree file formats can lead to erroneous tree visualizations and therefore to incorrect interpretations of phylogenetic analyses. Here, we discuss problems that arise when displaying branch values on trees after rerooting. Branch values are typically stored as node labels in the widely-used Newick tree format. However, such values are attributes of branches. Storing them as node labels can therefore yield errors when rerooting trees. This depends on the mostly implicit semantics that tools deploy to interpret node labels. We reviewed ten tree viewers and ten bioinformatics toolkits that can display and reroot trees. We found that 14 out of 20 of these tools do not permit users to select the semantics of node labels. Thus, unaware users might obtain incorrect results when rooting trees. We illustrate such incorrect mappings for several test cases and real examples taken from the literature. This review has already led to improvements in eight tools. We suggest tools should provide options that explicitly force users to define the semantics of node labels.}",
    issn = {0737-4038},
    doi = {10.1093/molbev/msx055},
    url = {https://doi.org/10.1093/molbev/msx055},
    eprint = {https://academic.oup.com/mbe/article-pdf/34/6/1535/17942399/msx055.pdf},
}
